From ea93850975ffc8baf5c43ebbce220430ff3809ab Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Thu, 12 Dec 2019 10:19:59 +0100 Subject: [PATCH] Correct typos (#614) * Correct typos * Add spell check when available * Update changelog * Fix space * Fix HER link --- docs/common/schedules.rst | 4 +- docs/conf.py | 11 +++ docs/guide/install.rst | 2 +- docs/guide/pretrain.rst | 2 +- docs/guide/rl_tips.rst | 6 +- docs/misc/changelog.rst | 4 +- docs/modules/her.rst | 2 +- docs/spelling_wordlist.txt | 103 ++++++++++++++++++++++ stable_baselines/acer/acer_simple.py | 6 +- stable_baselines/acktr/acktr.py | 2 +- stable_baselines/acktr/kfac.py | 2 +- stable_baselines/common/atari_wrappers.py | 2 +- stable_baselines/common/base_class.py | 10 +-- stable_baselines/common/cmd_util.py | 8 +- stable_baselines/common/distributions.py | 40 ++++----- stable_baselines/common/evaluation.py | 2 +- stable_baselines/common/math_util.py | 2 +- stable_baselines/common/noise.py | 4 +- stable_baselines/common/policies.py | 6 +- stable_baselines/common/schedules.py | 4 +- stable_baselines/common/tf_util.py | 8 +- stable_baselines/ddpg/ddpg.py | 8 +- stable_baselines/ddpg/policies.py | 4 +- stable_baselines/deepq/build_graph.py | 4 +- stable_baselines/deepq/policies.py | 12 +-- stable_baselines/gail/adversary.py | 2 +- stable_baselines/gail/dataset/dataset.py | 2 +- stable_baselines/ppo2/ppo2.py | 4 +- stable_baselines/sac/policies.py | 10 +-- stable_baselines/sac/sac.py | 6 +- stable_baselines/td3/policies.py | 4 +- stable_baselines/td3/td3.py | 2 +- 32 files changed, 202 insertions(+), 86 deletions(-) create mode 100644 docs/spelling_wordlist.txt diff --git a/docs/common/schedules.rst b/docs/common/schedules.rst index dc545ae0a9..968a067601 100644 --- a/docs/common/schedules.rst +++ b/docs/common/schedules.rst @@ -3,8 +3,8 @@ Schedules ========= -Schedules are used as hyperparameter for most of the algortihms, -in order to change value of a parameter over time (usuallly the learning rate). +Schedules are used as hyperparameter for most of the algorithms, +in order to change value of a parameter over time (usually the learning rate). .. automodule:: stable_baselines.common.schedules diff --git a/docs/conf.py b/docs/conf.py index dfadbcc85e..f4768de100 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,6 +16,14 @@ import sys from unittest.mock import MagicMock +# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support +# PyEnchant. +try: + import sphinxcontrib.spelling + enable_spell_check = True +except ImportError: + enable_spell_check = False + # source code directory, relative to this file, for sphinx-autobuild sys.path.insert(0, os.path.abspath('..')) @@ -69,6 +77,9 @@ def __getattr__(cls, name): 'sphinx.ext.viewcode', ] +if enable_spell_check: + extensions.append('sphinxcontrib.spelling') + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/docs/guide/install.rst b/docs/guide/install.rst index 39326daf27..134f4a7fe2 100644 --- a/docs/guide/install.rst +++ b/docs/guide/install.rst @@ -169,7 +169,7 @@ Explanation of the docker command: - ``--ipc=host`` Use the host system’s IPC namespace. IPC (POSIX/SysV IPC) namespace provides separation of named shared memory segments, semaphores and message queues. -- ``--name test`` give explicitely the name ``test`` to the container, +- ``--name test`` give explicitly the name ``test`` to the container, otherwise it will be assigned a random name - ``--mount src=...`` give access of the local directory (``pwd`` command) to the container (it will be map to ``/root/code/stable-baselines``), so diff --git a/docs/guide/pretrain.rst b/docs/guide/pretrain.rst index b38e7d6fdc..788f91dbc6 100644 --- a/docs/guide/pretrain.rst +++ b/docs/guide/pretrain.rst @@ -80,7 +80,7 @@ The idea is that this callable can be a PID controller, asking a human player, . return env.action_space.sample() # Data will be saved in a numpy archive named `expert_cartpole.npz` # when using something different than an RL expert, - # you must pass the environment object explicitely + # you must pass the environment object explicitly generate_expert_traj(dummy_expert, 'dummy_expert_cartpole', env, n_episodes=10) diff --git a/docs/guide/rl_tips.rst b/docs/guide/rl_tips.rst index 465a15a4af..1970fb2c62 100644 --- a/docs/guide/rl_tips.rst +++ b/docs/guide/rl_tips.rst @@ -33,7 +33,7 @@ bad trajectories. This factor, among others, explains that results in RL may vary from one run to another (i.e., when only the seed of the pseudo-random generator changes). For this reason, you should always do several runs to have quantitative results. -Good results in RL are generally dependent on finding appropriate hyperparameters. Recent alogrithms (PPO, SAC, TD3) normally require little hyperparameter tuning, +Good results in RL are generally dependent on finding appropriate hyperparameters. Recent algorithms (PPO, SAC, TD3) normally require little hyperparameter tuning, however, *don't expect the default ones to work* on any environment. Therefore, we *highly recommend you* to take a look at the `RL zoo `_ (or the original papers) for tuned hyperparameters. @@ -93,7 +93,7 @@ or continuous actions (ex: go to a certain speed)? Some algorithms are only tailored for one or the other domain: `DQN` only supports discrete actions, where `SAC` is restricted to continuous actions. The second difference that will help you choose is whether you can parallelize your training or not, and how you can do it (with or without MPI?). -If what matters is the wall clock training time, then you should lean towards `À2C` and its derivates (PPO, ACER, ACKTR, ...). +If what matters is the wall clock training time, then you should lean towards `A2C` and its derivatives (PPO, ACER, ACKTR, ...). Take a look at the `Vectorized Environments `_ to learn more about training with multiple workers. To sum it up: @@ -146,7 +146,7 @@ If you can use MPI, then you can choose between PPO1, TRPO and DDPG. Goal Environment ----------------- -If your environment follows the `GoalEnv` interface (cf `HER `_), then you should use +If your environment follows the `GoalEnv` interface (cf `HER <../modules/her.html>`_), then you should use HER + (SAC/TD3/DDPG/DQN) depending on the action space. diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index ec623957a4..b0d7c5b66a 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -73,6 +73,8 @@ Documentation: - Update custom env documentation to reflect new gym API for the `close()` method (@justinkterry) - Update custom env documentation to clarify what step and reset return (@justinkterry) - Add RL tips and tricks for doing RL experiments +- Corrected lots of typos +- Add spell check to documentation if available Release 2.8.0 (2019-09-29) @@ -388,7 +390,7 @@ Release 2.1.1 (2018-10-20) -------------------------- - fixed MpiAdam synchronization issue in PPO1 (thanks to @brendenpetersen) issue #50 -- fixed dependency issues (new mujoco-py requires a mujoco licence + gym broke MultiDiscrete space shape) +- fixed dependency issues (new mujoco-py requires a mujoco license + gym broke MultiDiscrete space shape) Release 2.1.0 (2018-10-2) diff --git a/docs/modules/her.rst b/docs/modules/her.rst index 8539dfaf9f..e64cd7eda6 100644 --- a/docs/modules/her.rst +++ b/docs/modules/her.rst @@ -93,7 +93,7 @@ Goal Selection Strategies :undoc-members: -Gaol Env Wrapper +Goal Env Wrapper ---------------- .. autoclass:: HERGoalEnvWrapper diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt new file mode 100644 index 0000000000..ca6b916007 --- /dev/null +++ b/docs/spelling_wordlist.txt @@ -0,0 +1,103 @@ +py +env +atari +argparse +Argparse +TensorFlow +feedforward +envs +VecEnv +pretrain +petrained +tf +np +mujoco +cpu +ndarray +ndarrays +timestep +timesteps +stepsize +dataset +adam +fn +normalisation +Kullback +Leibler +boolean +deserialized +pretrained +minibatch +subprocesses +ArgumentParser +Tensorflow +Gaussian +approximator +minibatches +hyperparameters +hyperparameter +vectorized +rl +colab +dataloader +npz +datasets +vf +logits +num +Utils +backpropagate +prepend +NaN +preprocessing +Cloudpickle +async +multiprocess +tensorflow +mlp +cnn +neglogp +tanh +coef +repo +Huber +params +ppo +arxiv +Arxiv +func +DQN +Uhlenbeck +Ornstein +multithread +cancelled +Tensorboard +parallelize +customising +serializable +Multiprocessed +cartpole +toolset +lstm +rescale +ffmpeg +avconv +unnormalized +Github +pre +preprocess +backend +attr +preprocess +Antonin +Raffin +araffin +Homebrew +Numpy +Theano +rollout +kfac +Piecewise +csv +nvidia +visdom diff --git a/stable_baselines/acer/acer_simple.py b/stable_baselines/acer/acer_simple.py index 53519b845b..043f353b84 100644 --- a/stable_baselines/acer/acer_simple.py +++ b/stable_baselines/acer/acer_simple.py @@ -75,7 +75,7 @@ class ACER(ActorCriticRLModel): Use `n_cpu_tf_sess` instead. :param q_coef: (float) The weight for the loss on the Q value - :param ent_coef: (float) The weight for the entropic loss + :param ent_coef: (float) The weight for the entropy loss :param max_grad_norm: (float) The clipping value for the maximum gradient :param learning_rate: (float) The initial learning rate for the RMS prop optimizer :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant', @@ -390,13 +390,13 @@ def custom_getter(getter, name, *args, **kwargs): tf.summary.scalar('rewards', tf.reduce_mean(self.reward_ph)) tf.summary.scalar('learning_rate', tf.reduce_mean(self.learning_rate)) tf.summary.scalar('advantage', tf.reduce_mean(adv)) - tf.summary.scalar('action_probabilty', tf.reduce_mean(self.mu_ph)) + tf.summary.scalar('action_probability', tf.reduce_mean(self.mu_ph)) if self.full_tensorboard_log: tf.summary.histogram('rewards', self.reward_ph) tf.summary.histogram('learning_rate', self.learning_rate) tf.summary.histogram('advantage', adv) - tf.summary.histogram('action_probabilty', self.mu_ph) + tf.summary.histogram('action_probability', self.mu_ph) if tf_util.is_image(self.observation_space): tf.summary.image('observation', train_model.obs_ph) else: diff --git a/stable_baselines/acktr/acktr.py b/stable_baselines/acktr/acktr.py index 8f447472e3..3499c3f47d 100644 --- a/stable_baselines/acktr/acktr.py +++ b/stable_baselines/acktr/acktr.py @@ -30,7 +30,7 @@ class ACKTR(ActorCriticRLModel): Use `n_cpu_tf_sess` instead. :param n_steps: (int) The number of steps to run for each environment - :param ent_coef: (float) The weight for the entropic loss + :param ent_coef: (float) The weight for the entropy loss :param vf_coef: (float) The weight for the loss on the value function :param vf_fisher_coef: (float) The weight for the fisher loss on the value function :param learning_rate: (float) The initial learning rate for the RMS prop optimizer diff --git a/stable_baselines/acktr/kfac.py b/stable_baselines/acktr/kfac.py index 4984b1dba1..4ab208056e 100644 --- a/stable_baselines/acktr/kfac.py +++ b/stable_baselines/acktr/kfac.py @@ -25,7 +25,7 @@ def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfac_update=2 :param clip_kl: (float) gradient clipping for Kullback-Leibler :param kfac_update: (int) update kfac after kfac_update steps :param stats_accum_iter: (int) how may steps to accumulate stats - :param full_stats_init: (bool) whether or not to fully initalize stats + :param full_stats_init: (bool) whether or not to fully initialize stats :param cold_iter: (int) Cold start learning rate for how many steps :param cold_lr: (float) Cold start learning rate :param async_eigen_decomp: (bool) Use async eigen decomposition diff --git a/stable_baselines/common/atari_wrappers.py b/stable_baselines/common/atari_wrappers.py index 97f59bb4e0..ee8579b25e 100644 --- a/stable_baselines/common/atari_wrappers.py +++ b/stable_baselines/common/atari_wrappers.py @@ -276,7 +276,7 @@ def __getitem__(self, i): def make_atari(env_id): """ - Create a wrapped atari envrionment + Create a wrapped atari Environment :param env_id: (str) the environment ID :return: (Gym Environment) the wrapped atari environment diff --git a/stable_baselines/common/base_class.py b/stable_baselines/common/base_class.py index e90bf9a0b1..1f34c68d0b 100644 --- a/stable_baselines/common/base_class.py +++ b/stable_baselines/common/base_class.py @@ -238,9 +238,9 @@ def _get_pretrain_placeholders(self): """ Return the placeholders needed for the pretraining: - obs_ph: observation placeholder - - actions_ph will be population with an action from the environement + - actions_ph will be population with an action from the environment (from the expert dataset) - - deterministic_actions_ph: e.g., in the case of a gaussian policy, + - deterministic_actions_ph: e.g., in the case of a Gaussian policy, the mean. :return: ((tf.placeholder)) (obs_ph, actions_ph, deterministic_actions_ph) @@ -474,7 +474,7 @@ def load(cls, load_path, env=None, custom_objects=None, **kwargs): Load the model from file :param load_path: (str or file-like) the saved parameter location - :param env: (Gym Envrionment) the new environment to run the loaded model on + :param env: (Gym Environment) the new environment to run the loaded model on (can be None if you only need prediction from a trained model) :param custom_objects: (dict) Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a @@ -862,7 +862,7 @@ def load(cls, load_path, env=None, custom_objects=None, **kwargs): Load the model from file :param load_path: (str or file-like) the saved parameter location - :param env: (Gym Envrionment) the new environment to run the loaded model on + :param env: (Gym Environment) the new environment to run the loaded model on (can be None if you only need prediction from a trained model) :param custom_objects: (dict) Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a @@ -945,7 +945,7 @@ def load(cls, load_path, env=None, custom_objects=None, **kwargs): Load the model from file :param load_path: (str or file-like) the saved parameter location - :param env: (Gym Envrionment) the new environment to run the loaded model on + :param env: (Gym Environment) the new environment to run the loaded model on (can be None if you only need prediction from a trained model) :param custom_objects: (dict) Dictionary of objects to replace upon loading. If a variable is present in this dictionary as a diff --git a/stable_baselines/common/cmd_util.py b/stable_baselines/common/cmd_util.py index 2883821019..c5ff663391 100644 --- a/stable_baselines/common/cmd_util.py +++ b/stable_baselines/common/cmd_util.py @@ -25,7 +25,7 @@ def make_vec_env(env_id, n_envs=1, seed=None, start_index=0, :param env_id: (str or Type[gym.Env]) the environment ID or the environment class :param n_envs: (int) the number of environments you wish to have in parallel - :param seed: (int) the inital seed for the random number generator + :param seed: (int) the initial seed for the random number generator :param start_index: (int) start rank index :param monitor_dir: (str) Path to a folder where the monitor files will be saved. If None, no file will be written, however, the env will still be wrapped @@ -80,7 +80,7 @@ def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, :param env_id: (str) the environment ID :param num_env: (int) the number of environment you wish to have in subprocesses - :param seed: (int) the inital seed for RNG + :param seed: (int) the initial seed for RNG :param wrapper_kwargs: (dict) the parameters for wrap_deepmind function :param start_index: (int) start rank index :param allow_early_resets: (bool) allows early reset of the environment @@ -116,7 +116,7 @@ def make_mujoco_env(env_id, seed, allow_early_resets=True): Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID - :param seed: (int) the inital seed for RNG + :param seed: (int) the initial seed for RNG :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The mujoco environment """ @@ -132,7 +132,7 @@ def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True): Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID - :param seed: (int) the inital seed for RNG + :param seed: (int) the initial seed for RNG :param rank: (int) the rank of the environment (for logging) :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The robotic environment diff --git a/stable_baselines/common/distributions.py b/stable_baselines/common/distributions.py index 786c6624e4..b64e0a5b5c 100644 --- a/stable_baselines/common/distributions.py +++ b/stable_baselines/common/distributions.py @@ -17,7 +17,7 @@ def flatparam(self): """ Return the direct probabilities - :return: ([float]) the probabilites + :return: ([float]) the probabilities """ raise NotImplementedError @@ -41,7 +41,7 @@ def neglogp(self, x): def kl(self, other): """ - Calculates the Kullback-Leibler divergence from the given probabilty distribution + Calculates the Kullback-Leibler divergence from the given probability distribution :param other: ([float]) the distribution to compare with :return: (float) the KL divergence of the two distributions @@ -50,7 +50,7 @@ def kl(self, other): def entropy(self): """ - Returns shannon's entropy of the probability + Returns Shannon's entropy of the probability :return: (float) the entropy """ @@ -58,7 +58,7 @@ def entropy(self): def sample(self): """ - returns a sample from the probabilty distribution + returns a sample from the probability distribution :return: (Tensorflow Tensor) the stochastic action """ @@ -103,8 +103,8 @@ def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, ini :param pi_latent_vector: ([float]) the latent pi values :param vf_latent_vector: ([float]) the latent vf values - :param init_scale: (float) the inital scale of the distribution - :param init_bias: (float) the inital bias of the distribution + :param init_scale: (float) the initial scale of the distribution + :param init_bias: (float) the initial bias of the distribution :return: (ProbabilityDistribution) the instance of the ProbabilityDistribution associated """ raise NotImplementedError @@ -217,7 +217,7 @@ def sample_dtype(self): class DiagGaussianProbabilityDistributionType(ProbabilityDistributionType): def __init__(self, size): """ - The probability distribution type for multivariate gaussian input + The probability distribution type for multivariate Gaussian input :param size: (int) the number of dimensions of the multivariate gaussian """ @@ -255,9 +255,9 @@ def sample_dtype(self): class BernoulliProbabilityDistributionType(ProbabilityDistributionType): def __init__(self, size): """ - The probability distribution type for bernoulli input + The probability distribution type for Bernoulli input - :param size: (int) the number of dimensions of the bernoulli distribution + :param size: (int) the number of dimensions of the Bernoulli distribution """ self.size = size @@ -381,9 +381,9 @@ def fromflat(cls, flat): class DiagGaussianProbabilityDistribution(ProbabilityDistribution): def __init__(self, flat): """ - Probability distributions from multivariate gaussian input + Probability distributions from multivariate Gaussian input - :param flat: ([float]) the multivariate gaussian input data + :param flat: ([float]) the multivariate Gaussian input data """ self.flat = flat mean, logstd = tf.split(axis=len(flat.shape) - 1, num_or_size_splits=2, value=flat) @@ -421,10 +421,10 @@ def sample(self): @classmethod def fromflat(cls, flat): """ - Create an instance of this from new multivariate gaussian input + Create an instance of this from new multivariate Gaussian input - :param flat: ([float]) the multivariate gaussian input data - :return: (ProbabilityDistribution) the instance from the given multivariate gaussian input data + :param flat: ([float]) the multivariate Gaussian input data + :return: (ProbabilityDistribution) the instance from the given multivariate Gaussian input data """ return cls(flat) @@ -432,9 +432,9 @@ def fromflat(cls, flat): class BernoulliProbabilityDistribution(ProbabilityDistribution): def __init__(self, logits): """ - Probability distributions from bernoulli input + Probability distributions from Bernoulli input - :param logits: ([float]) the bernoulli input data + :param logits: ([float]) the Bernoulli input data """ self.logits = logits self.probabilities = tf.sigmoid(logits) @@ -468,10 +468,10 @@ def sample(self): @classmethod def fromflat(cls, flat): """ - Create an instance of this from new bernoulli input + Create an instance of this from new Bernoulli input - :param flat: ([float]) the bernoulli input data - :return: (ProbabilityDistribution) the instance from the given bernoulli input data + :param flat: ([float]) the Bernoulli input data + :return: (ProbabilityDistribution) the instance from the given Bernoulli input data """ return cls(flat) @@ -481,7 +481,7 @@ def make_proba_dist_type(ac_space): return an instance of ProbabilityDistributionType for the correct type of action space :param ac_space: (Gym Space) the input action space - :return: (ProbabilityDistributionType) the approriate instance of a ProbabilityDistributionType + :return: (ProbabilityDistributionType) the appropriate instance of a ProbabilityDistributionType """ if isinstance(ac_space, spaces.Box): assert len(ac_space.shape) == 1, "Error: the action space must be a vector" diff --git a/stable_baselines/common/evaluation.py b/stable_baselines/common/evaluation.py index a8fb7887a3..67e10d06c5 100644 --- a/stable_baselines/common/evaluation.py +++ b/stable_baselines/common/evaluation.py @@ -15,7 +15,7 @@ def evaluate_policy(model, env, n_eval_episodes=10, deterministic=True, this must contain only one environment. :param n_eval_episodes: (int) Number of episode to evaluate the agent :param deterministic: (bool) Whether to use deterministic or stochastic actions - :param render: (bool) Whether to render the environement or not + :param render: (bool) Whether to render the environment or not :param callback: (callable) callback function to do additional checks, called after each step. :param reward_threshold: (float) Minimum expected reward per episode, diff --git a/stable_baselines/common/math_util.py b/stable_baselines/common/math_util.py index 8ec0659ccb..05c4cd72a5 100644 --- a/stable_baselines/common/math_util.py +++ b/stable_baselines/common/math_util.py @@ -61,7 +61,7 @@ def flatten_arrays(arrs): flattens a list of arrays down to 1D :param arrs: ([np.ndarray]) arrays - :return: (np.ndarray) 1D flattend array + :return: (np.ndarray) 1D flattened array """ return np.concatenate([arr.flat for arr in arrs]) diff --git a/stable_baselines/common/noise.py b/stable_baselines/common/noise.py index 446aced590..caecc55afb 100644 --- a/stable_baselines/common/noise.py +++ b/stable_baselines/common/noise.py @@ -55,7 +55,7 @@ def reset(self): class NormalActionNoise(ActionNoise): """ - A gaussian action noise + A Gaussian action noise :param mean: (float) the mean value of the noise :param sigma: (float) the scale of the noise (std here) @@ -73,7 +73,7 @@ def __repr__(self): class OrnsteinUhlenbeckActionNoise(ActionNoise): """ - A Ornstein Uhlenbeck action noise, this is designed to aproximate brownian motion with friction. + A Ornstein Uhlenbeck action noise, this is designed to approximate brownian motion with friction. Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab diff --git a/stable_baselines/common/policies.py b/stable_baselines/common/policies.py index d9e16cd092..eced062f64 100644 --- a/stable_baselines/common/policies.py +++ b/stable_baselines/common/policies.py @@ -101,7 +101,7 @@ class BasePolicy(ABC): :param reuse: (bool) If the policy is reusable or not :param scale: (bool) whether or not to scale the input :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param add_action_ph: (bool) whether or not to create an action placeholder """ @@ -171,9 +171,9 @@ def _kwargs_check(feature_extraction, kwargs): # When using policy_kwargs parameter on model creation, # all keywords arguments must be consumed by the policy constructor except # the ones for the cnn_extractor network (cf nature_cnn()), where the keywords arguments - # are not passed explicitely (using **kwargs to forward the arguments) + # are not passed explicitly (using **kwargs to forward the arguments) # that's why there should be not kwargs left when using the mlp_extractor - # (in that case the keywords arguments are passed explicitely) + # (in that case the keywords arguments are passed explicitly) if feature_extraction == 'mlp' and len(kwargs) > 0: raise ValueError("Unknown keywords for policy: {}".format(kwargs)) diff --git a/stable_baselines/common/schedules.py b/stable_baselines/common/schedules.py index 57f4013fb8..f20b7887a3 100644 --- a/stable_baselines/common/schedules.py +++ b/stable_baselines/common/schedules.py @@ -53,7 +53,7 @@ class PiecewiseSchedule(Schedule): Piecewise schedule. :param endpoints: ([(int, int)]) - list of pairs `(time, value)` meanining that schedule should output + list of pairs `(time, value)` meaning that schedule should output `value` when `t==time`. All the values for time must be sorted in an increasing order. When t is between two times, e.g. `(time_a, value_a)` and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs @@ -64,7 +64,7 @@ class PiecewiseSchedule(Schedule): to the `endpoints`. Alpha is the fraction of distance from left endpoint to right endpoint that t has covered. See linear_interpolation for example. :param outside_value: (float) - if the value is requested outside of all the intervals sepecified in + if the value is requested outside of all the intervals specified in `endpoints` this value is returned. If None then AssertionError is raised when outside value is requested. """ diff --git a/stable_baselines/common/tf_util.py b/stable_baselines/common/tf_util.py index 6ec362b140..e737fa4a83 100644 --- a/stable_baselines/common/tf_util.py +++ b/stable_baselines/common/tf_util.py @@ -30,8 +30,8 @@ def huber_loss(tensor, delta=1.0): Reference: https://en.wikipedia.org/wiki/Huber_loss :param tensor: (TensorFlow Tensor) the input value - :param delta: (float) huber loss delta value - :return: (TensorFlow Tensor) huber loss output + :param delta: (float) Huber loss delta value + :return: (TensorFlow Tensor) Huber loss output """ return tf.where( tf.abs(tensor) < delta, @@ -80,7 +80,7 @@ def single_threaded_session(make_default=False, graph=None): def in_session(func): """ - wrappes a function so that it is in a TensorFlow Session + Wraps a function so that it is in a TensorFlow Session :param func: (function) the function to wrap :return: (function) @@ -241,7 +241,7 @@ def flatgrad(loss, var_list, clip_norm=None): :param loss: (float) the loss value :param var_list: ([TensorFlow Tensor]) the variables :param clip_norm: (float) clip the gradients (disabled if None) - :return: ([TensorFlow Tensor]) flattend gradient + :return: ([TensorFlow Tensor]) flattened gradient """ grads = tf.gradients(loss, var_list) if clip_norm is not None: diff --git a/stable_baselines/ddpg/ddpg.py b/stable_baselines/ddpg/ddpg.py index 64be5e2e83..94896d2faf 100644 --- a/stable_baselines/ddpg/ddpg.py +++ b/stable_baselines/ddpg/ddpg.py @@ -128,7 +128,7 @@ def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev, verb if var in get_perturbable_vars(actor): if verbose >= 2: logger.info(' {} <- {} + noise'.format(perturbed_var.name, var.name)) - # Add gaussian noise to the parameter + # Add Gaussian noise to the parameter updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev))) else: @@ -157,7 +157,7 @@ class DDPG(OffPolicyRLModel): :param eval_env: (Gym Environment) the evaluation environment (can be None) :param nb_train_steps: (int) the number of training steps :param nb_rollout_steps: (int) the number of rollout steps - :param nb_eval_steps: (int) the number of evalutation steps + :param nb_eval_steps: (int) the number of evaluation steps :param param_noise: (AdaptiveParamNoiseSpec) the parameter noise type (can be None) :param action_noise: (ActionNoise) the action noise type (can be None) :param param_noise_adaption_interval: (int) apply param noise every N steps @@ -175,7 +175,7 @@ class DDPG(OffPolicyRLModel): :param clip_norm: (float) clip the gradients (disabled if None) :param reward_scale: (float) the value the reward should be scaled by :param render: (bool) enable rendering of the environment - :param render_eval: (bool) enable rendering of the evalution environment + :param render_eval: (bool) enable rendering of the evaluation environment :param memory_limit: (int) the max number of transitions to store, size of the replay buffer .. deprecated:: 2.6.0 @@ -870,7 +870,7 @@ def learn(self, total_timesteps, callback=None, log_interval=100, tb_log_name="D self.env.render() # Randomly sample actions from a uniform distribution - # with a probabilty self.random_exploration (used in HER + DDPG) + # with a probability self.random_exploration (used in HER + DDPG) if np.random.rand() < self.random_exploration: # actions sampled from action space are from range specific to the environment # but algorithm operates on tanh-squashed actions therefore simple scaling is used diff --git a/stable_baselines/ddpg/policies.py b/stable_baselines/ddpg/policies.py index f0ac63087b..37e0e26e8b 100644 --- a/stable_baselines/ddpg/policies.py +++ b/stable_baselines/ddpg/policies.py @@ -31,7 +31,7 @@ def make_actor(self, obs=None, reuse=False, scope="pi"): creates an actor object :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name of the actor :return: (TensorFlow Tensor) the output tensor """ @@ -43,7 +43,7 @@ def make_critic(self, obs=None, action=None, reuse=False, scope="qf"): :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) :param action: (TensorFlow Tensor) The action placeholder (can be None for default placeholder) - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name of the critic :return: (TensorFlow Tensor) the output tensor """ diff --git a/stable_baselines/deepq/build_graph.py b/stable_baselines/deepq/build_graph.py index b6a9d39589..51453ec6e5 100644 --- a/stable_baselines/deepq/build_graph.py +++ b/stable_baselines/deepq/build_graph.py @@ -134,7 +134,7 @@ def build_act(q_func, ob_space, ac_space, stochastic_ph, update_eps_ph, sess): :param sess: (TensorFlow session) The current TensorFlow session :return: (function (TensorFlow Tensor, bool, float): TensorFlow Tensor, (TensorFlow Tensor, TensorFlow Tensor) act function to select and action given observation (See the top of the file for details), - A tuple containing the observation placeholder and the processed observation placeholder respectivly. + A tuple containing the observation placeholder and the processed observation placeholder respectively. """ eps = tf.get_variable("eps", (), initializer=tf.constant_initializer(0)) @@ -177,7 +177,7 @@ def build_act_with_param_noise(q_func, ob_space, ac_space, stochastic_ph, update is used by default. :return: (function (TensorFlow Tensor, bool, float): TensorFlow Tensor, (TensorFlow Tensor, TensorFlow Tensor) act function to select and action given observation (See the top of the file for details), - A tuple containing the observation placeholder and the processed observation placeholder respectivly. + A tuple containing the observation placeholder and the processed observation placeholder respectively. """ if param_noise_filter_func is None: param_noise_filter_func = default_param_noise_filter diff --git a/stable_baselines/deepq/policies.py b/stable_baselines/deepq/policies.py index 5128f5467e..3a2dfec16d 100644 --- a/stable_baselines/deepq/policies.py +++ b/stable_baselines/deepq/policies.py @@ -19,7 +19,7 @@ class DQNPolicy(BasePolicy): :param reuse: (bool) If the policy is reusable or not :param scale: (bool) whether or not to scale the input :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param dueling: (bool) if true double the output MLP to compute a baseline for action scores """ @@ -81,7 +81,7 @@ class FeedForwardPolicy(DQNPolicy): :param cnn_extractor: (function (TensorFlow Tensor, ``**kwargs``): (TensorFlow Tensor)) the CNN feature extraction :param feature_extraction: (str) The feature extraction type ("cnn" or "mlp") :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param layer_norm: (bool) enable layer normalisation :param dueling: (bool) if true double the output MLP to compute a baseline for action scores :param act_fun: (tf.func) the activation function to use in the neural network. @@ -164,7 +164,7 @@ class CnnPolicy(FeedForwardPolicy): :param n_batch: (int) The number of batch to run (n_envs * n_steps) :param reuse: (bool) If the policy is reusable or not :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param dueling: (bool) if true double the output MLP to compute a baseline for action scores :param _kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction """ @@ -188,7 +188,7 @@ class LnCnnPolicy(FeedForwardPolicy): :param n_batch: (int) The number of batch to run (n_envs * n_steps) :param reuse: (bool) If the policy is reusable or not :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param dueling: (bool) if true double the output MLP to compute a baseline for action scores :param _kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction """ @@ -212,7 +212,7 @@ class MlpPolicy(FeedForwardPolicy): :param n_batch: (int) The number of batch to run (n_envs * n_steps) :param reuse: (bool) If the policy is reusable or not :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param dueling: (bool) if true double the output MLP to compute a baseline for action scores :param _kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction """ @@ -236,7 +236,7 @@ class LnMlpPolicy(FeedForwardPolicy): :param n_batch: (int) The number of batch to run (n_envs * n_steps) :param reuse: (bool) If the policy is reusable or not :param obs_phs: (TensorFlow Tensor, TensorFlow Tensor) a tuple containing an override for observation placeholder - and the processed observation placeholder respectivly + and the processed observation placeholder respectively :param dueling: (bool) if true double the output MLP to compute a baseline for action scores :param _kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction """ diff --git a/stable_baselines/gail/adversary.py b/stable_baselines/gail/adversary.py index ade1d977c1..7c6cb63c68 100644 --- a/stable_baselines/gail/adversary.py +++ b/stable_baselines/gail/adversary.py @@ -26,7 +26,7 @@ def logit_bernoulli_entropy(logits): https://github.com/openai/imitation/blob/99fbccf3e060b6e6c739bdf209758620fcdefd3c/policyopt/thutil.py#L48-L51 :param logits: (tf.Tensor) the logits - :return: (tf.Tensor) the bernoulli entropy + :return: (tf.Tensor) the Bernoulli entropy """ ent = (1. - tf.nn.sigmoid(logits)) * logits - logsigmoid(logits) return ent diff --git a/stable_baselines/gail/dataset/dataset.py b/stable_baselines/gail/dataset/dataset.py index b64236154e..6b8aa035da 100644 --- a/stable_baselines/gail/dataset/dataset.py +++ b/stable_baselines/gail/dataset/dataset.py @@ -181,7 +181,7 @@ class DataLoader(object): :param actions: (np.ndarray) actions :param batch_size: (int) Number of samples per minibatch :param n_workers: (int) number of preprocessing worker (for loading the images) - :param infinite_loop: (bool) whether to have an iterator that can be resetted + :param infinite_loop: (bool) whether to have an iterator that can be reset :param max_queue_len: (int) Max number of minibatches that can be preprocessed at the same time :param shuffle: (bool) Shuffle the minibatch after each epoch :param start_process: (bool) Start the preprocessing process (default: True) diff --git a/stable_baselines/ppo2/ppo2.py b/stable_baselines/ppo2/ppo2.py index 6d998d2d18..47deaa4a51 100644 --- a/stable_baselines/ppo2/ppo2.py +++ b/stable_baselines/ppo2/ppo2.py @@ -220,7 +220,7 @@ def setup_model(self): if self.clip_range_vf_ph is not None: tf.summary.scalar('clip_range_vf', tf.reduce_mean(self.clip_range_vf_ph)) - tf.summary.scalar('old_neglog_action_probabilty', tf.reduce_mean(self.old_neglog_pac_ph)) + tf.summary.scalar('old_neglog_action_probability', tf.reduce_mean(self.old_neglog_pac_ph)) tf.summary.scalar('old_value_pred', tf.reduce_mean(self.old_vpred_ph)) if self.full_tensorboard_log: @@ -228,7 +228,7 @@ def setup_model(self): tf.summary.histogram('learning_rate', self.learning_rate_ph) tf.summary.histogram('advantage', self.advs_ph) tf.summary.histogram('clip_range', self.clip_range_ph) - tf.summary.histogram('old_neglog_action_probabilty', self.old_neglog_pac_ph) + tf.summary.histogram('old_neglog_action_probability', self.old_neglog_pac_ph) tf.summary.histogram('old_value_pred', self.old_vpred_ph) if tf_util.is_image(self.observation_space): tf.summary.image('observation', train_model.obs_ph) diff --git a/stable_baselines/sac/policies.py b/stable_baselines/sac/policies.py index 0240507e7b..b9337e98a6 100644 --- a/stable_baselines/sac/policies.py +++ b/stable_baselines/sac/policies.py @@ -26,7 +26,7 @@ def gaussian_likelihood(input_, mu_, log_std): def gaussian_entropy(log_std): """ - Compute the entropy for a diagonal gaussian distribution. + Compute the entropy for a diagonal Gaussian distribution. :param log_std: (tf.Tensor) Log of the standard deviation :return: (tf.Tensor) @@ -61,7 +61,7 @@ def clip_but_pass_gradient(input_, lower=-1., upper=1.): def apply_squashing_func(mu_, pi_, logp_pi): """ - Squash the ouput of the gaussian distribution + Squash the output of the Gaussian distribution and account for that in the log probability The squashed mean is also returned for using deterministic actions. @@ -113,7 +113,7 @@ def make_actor(self, obs=None, reuse=False, scope="pi"): Creates an actor object :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name of the actor :return: (TensorFlow Tensor) the output tensor """ @@ -126,7 +126,7 @@ def make_critics(self, obs=None, action=None, reuse=False, :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) :param action: (TensorFlow Tensor) The action placeholder - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name :param create_vf: (bool) Whether to create Value fn or not :param create_qf: (bool) Whether to create Q-Values fn or not @@ -235,7 +235,7 @@ def make_actor(self, obs=None, reuse=False, scope="pi"): logp_pi = gaussian_likelihood(pi_, mu_, log_std) self.entropy = gaussian_entropy(log_std) # MISSING: reg params for log and mu - # Apply squashing and account for it in the probabilty + # Apply squashing and account for it in the probability deterministic_policy, policy, logp_pi = apply_squashing_func(mu_, pi_, logp_pi) self.policy = policy self.deterministic_policy = deterministic_policy diff --git a/stable_baselines/sac/sac.py b/stable_baselines/sac/sac.py index 91f2a36839..33ef511249 100644 --- a/stable_baselines/sac/sac.py +++ b/stable_baselines/sac/sac.py @@ -176,7 +176,7 @@ def setup_model(self): # Create the policy # first return value corresponds to deterministic actions # policy_out corresponds to stochastic actions, used for training - # logp_pi is the log probabilty of actions taken by the policy + # logp_pi is the log probability of actions taken by the policy self.deterministic_action, policy_out, logp_pi = self.policy_tf.make_actor(self.processed_obs_ph) # Monitor the entropy of the policy, # this is not used for training @@ -250,7 +250,7 @@ def setup_model(self): policy_kl_loss = tf.reduce_mean(self.ent_coef * logp_pi - qf1_pi) # NOTE: in the original implementation, they have an additional - # regularization loss for the gaussian parameters + # regularization loss for the Gaussian parameters # this is not used for now # policy_loss = (policy_kl_loss + policy_regularization_loss) policy_loss = policy_kl_loss @@ -510,7 +510,7 @@ def action_probability(self, observation, state=None, mask=None, actions=None, l raise ValueError("Error: SAC does not have action probabilities.") warnings.warn("Even though SAC has a Gaussian policy, it cannot return a distribution as it " - "is squashed by a tanh before being scaled and ouputed.") + "is squashed by a tanh before being scaled and outputed.") return None diff --git a/stable_baselines/td3/policies.py b/stable_baselines/td3/policies.py index 2860291974..d1b42ba142 100644 --- a/stable_baselines/td3/policies.py +++ b/stable_baselines/td3/policies.py @@ -33,7 +33,7 @@ def make_actor(self, obs=None, reuse=False, scope="pi"): Creates an actor object :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name of the actor :return: (TensorFlow Tensor) the output tensor """ @@ -46,7 +46,7 @@ def make_critics(self, obs=None, action=None, reuse=False, :param obs: (TensorFlow Tensor) The observation placeholder (can be None for default placeholder) :param action: (TensorFlow Tensor) The action placeholder - :param reuse: (bool) whether or not to resue parameters + :param reuse: (bool) whether or not to reuse parameters :param scope: (str) the scope name :return: ([tf.Tensor]) Mean, action and log probability """ diff --git a/stable_baselines/td3/td3.py b/stable_baselines/td3/td3.py index 989d9df604..2f66a44082 100644 --- a/stable_baselines/td3/td3.py +++ b/stable_baselines/td3/td3.py @@ -38,7 +38,7 @@ class TD3(OffPolicyRLModel): :param policy_delay: (int) Policy and target networks will only be updated once every policy_delay steps per training steps. The Q values will be updated policy_delay more often (update every training step). :param action_noise: (ActionNoise) the action noise type. Cf DDPG for the different action noise type. - :param target_policy_noise: (float) Standard deviation of gaussian noise added to target policy + :param target_policy_noise: (float) Standard deviation of Gaussian noise added to target policy (smoothing noise) :param target_noise_clip: (float) Limit for absolute value of target policy smoothing noise. :param train_freq: (int) Update the model every `train_freq` steps.