From 6d5ae91846b102325bdcb188194648cd738bf924 Mon Sep 17 00:00:00 2001 From: KelvinYang0320 Date: Wed, 8 Dec 2021 21:37:57 +0800 Subject: [PATCH 1/3] GoalEnv for Robot-Supervisor scheme --- .../controllers/robot_supervisor.py | 23 +++- .../supervisor/controllers/supervisor_env.py | 120 ++++++++++++------ 2 files changed, 100 insertions(+), 43 deletions(-) diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py index 50675ca..7565d18 100644 --- a/deepbots/supervisor/controllers/robot_supervisor.py +++ b/deepbots/supervisor/controllers/robot_supervisor.py @@ -1,5 +1,5 @@ from warnings import warn, simplefilter -from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv +from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv, SupervisorGoalEnv from controller import Supervisor @@ -99,3 +99,24 @@ def apply_action(self, action): :param action: list, containing action data """ raise NotImplementedError + +class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor): + """ + The RobotGoalSupervisor class is just like RobotSupervisor, but it + uses compute_reward from gym.GoalEnv. + """ + def __init__(self, timestep=None): + super(RobotGoalSupervisor, self).__init__() + + if timestep is None: + self.timestep = int(self.getBasicTimeStep()) + else: + self.timestep = timestep + + def step(self, action): + """ + The basic step method is use-case specific and needs to be implemented + by the user and please use compute_reward inherited from gym.GoalEnv() + instead of get_reward(). + """ + raise NotImplementedError \ No newline at end of file diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py index 78a58f0..3fb082f 100644 --- a/deepbots/supervisor/controllers/supervisor_env.py +++ b/deepbots/supervisor/controllers/supervisor_env.py @@ -2,27 +2,8 @@ from controller import Supervisor -class SupervisorEnv(Supervisor, gym.Env): - """ - This class is the highest class in deepbots class hierarchy, inheriting - both the Webots Supervisor controller and the basic gym.Env. - - Refer to gym.Env documentation on how to implement a custom gym.Env - for additional functionality. - - This class contains abstract methods that guide the development process - for users that want to implement a simple environment. - - This class is not intended for user usage, but to provide a common - interface for all provided supervisor classes and make them - compatible with reinforcement learning agents that work with - the gym interface. Moreover, a problem-agnostic reset method is - provided. Please use any of the children supervisor classes to be - inherited by your own class, such as the RobotSupervisor class. - Nevertheless, advanced users can inherit this class to create - their own supervisor classes if they wish. - """ - +class SupervisorBasicEnv: + def step(self, action): """ On each timestep, the agent chooses an action for the previous @@ -43,27 +24,6 @@ def step(self, action): """ raise NotImplementedError - def reset(self): - """ - Used to reset the world to an initial state. - - Default, problem-agnostic, implementation of reset method, - using Webots-provided methods. - - *Note that this works properly only with Webots versions >R2020b - and must be overridden with a custom reset method when using - earlier versions. It is backwards compatible due to the fact - that the new reset method gets overridden by whatever the user - has previously implemented, so an old supervisor can be migrated - easily to use this class. - - :return: default observation provided by get_default_observation() - """ - self.simulationReset() - self.simulationResetPhysics() - super(Supervisor, self).step(int(self.getBasicTimeStep())) - return self.get_default_observation() - def get_default_observation(self): """ This method should be implemented to return a default/starting @@ -115,3 +75,79 @@ def get_info(self): information on each step, e.g. for debugging purposes. """ raise NotImplementedError + +class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv): + """ + This class is the highest class except SupervisorBasicEnv in deepbots + class hierarchy, inheriting the Webots Supervisor controller, the basic + gym.Env, and the basic RL functions. + + Refer to gym.Env documentation on how to implement a custom gym.Env + for additional functionality. + + This class contains abstract methods that guide the development process + for users that want to implement a simple environment. + + This class is not intended for user usage, but to provide a common + interface for all provided supervisor classes and make them + compatible with reinforcement learning agents that work with + the gym interface. Moreover, a problem-agnostic reset method is + provided. Please use any of the children supervisor classes to be + inherited by your own class, such as the RobotSupervisor class. + Nevertheless, advanced users can inherit this class to create + their own supervisor classes if they wish. + """ + + def reset(self): + """ + Used to reset the world to an initial state. + + Default, problem-agnostic, implementation of reset method, + using Webots-provided methods. + + *Note that this works properly only with Webots versions >R2020b + and must be overridden with a custom reset method when using + earlier versions. It is backwards compatible due to the fact + that the new reset method gets overridden by whatever the user + has previously implemented, so an old supervisor can be migrated + easily to use this class. + + :return: default observation provided by get_default_observation() + """ + self.simulationReset() + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) + return self.get_default_observation() + + + +class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv): + """ + This class is just like SupervisorEnv, but it imposes gym.GoalEnv. + + Refer to gym.GoalEnv documentation on how to implement a custom + gym.GoalEnv for additional functionality. + """ + + def reset(self): + """ + Used to reset the world to an initial state and enforce that each + SupervisorGoalEnv uses a Goal-compatible observation space. + + Default, problem-agnostic, implementation of reset method, + using Webots-provided methods. + + *Note that this works properly only with Webots versions >R2020b + and must be overridden with a custom reset method when using + earlier versions. It is backwards compatible due to the fact + that the new reset method gets overridden by whatever the user + has previously implemented, so an old supervisor can be migrated + easily to use this class. + + :return: default observation provided by get_default_observation() + """ + super().reset() + self.simulationReset() + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) + return self.get_default_observation() \ No newline at end of file From 2d4400d7fb32de4e3a82644ac34453448bdf8adf Mon Sep 17 00:00:00 2001 From: KelvinYang0320 Date: Sun, 12 Dec 2021 12:20:18 +0800 Subject: [PATCH 2/3] PEP8 --- deepbots/supervisor/controllers/robot_supervisor.py | 5 ++++- deepbots/supervisor/controllers/supervisor_env.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py index 7565d18..7646c93 100644 --- a/deepbots/supervisor/controllers/robot_supervisor.py +++ b/deepbots/supervisor/controllers/robot_supervisor.py @@ -27,6 +27,7 @@ class RobotSupervisor(SupervisorEnv): action, e.g. motor speeds. Note that apply_action() is called during step(). """ + def __init__(self, timestep=None): super(RobotSupervisor, self).__init__() @@ -100,11 +101,13 @@ def apply_action(self, action): """ raise NotImplementedError + class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor): """ The RobotGoalSupervisor class is just like RobotSupervisor, but it uses compute_reward from gym.GoalEnv. """ + def __init__(self, timestep=None): super(RobotGoalSupervisor, self).__init__() @@ -119,4 +122,4 @@ def step(self, action): by the user and please use compute_reward inherited from gym.GoalEnv() instead of get_reward(). """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py index 3fb082f..f7b23c5 100644 --- a/deepbots/supervisor/controllers/supervisor_env.py +++ b/deepbots/supervisor/controllers/supervisor_env.py @@ -3,7 +3,7 @@ class SupervisorBasicEnv: - + def step(self, action): """ On each timestep, the agent chooses an action for the previous @@ -76,6 +76,7 @@ def get_info(self): """ raise NotImplementedError + class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv): """ This class is the highest class except SupervisorBasicEnv in deepbots @@ -119,7 +120,6 @@ def reset(self): super(Supervisor, self).step(int(self.getBasicTimeStep())) return self.get_default_observation() - class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv): """ @@ -146,8 +146,8 @@ def reset(self): :return: default observation provided by get_default_observation() """ - super().reset() + super().reset() self.simulationReset() self.simulationResetPhysics() super(Supervisor, self).step(int(self.getBasicTimeStep())) - return self.get_default_observation() \ No newline at end of file + return self.get_default_observation() From 4fbfd891f083853f19b617a6d777cefae868b60d Mon Sep 17 00:00:00 2001 From: KelvinYang0320 Date: Sun, 12 Dec 2021 20:55:35 +0800 Subject: [PATCH 3/3] better class inheritance --- .../controllers/robot_supervisor.py | 45 +++++++++- .../supervisor/controllers/supervisor_env.py | 88 +++++++++---------- 2 files changed, 83 insertions(+), 50 deletions(-) diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py index 7646c93..4c97b8e 100644 --- a/deepbots/supervisor/controllers/robot_supervisor.py +++ b/deepbots/supervisor/controllers/robot_supervisor.py @@ -106,6 +106,11 @@ class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor): """ The RobotGoalSupervisor class is just like RobotSupervisor, but it uses compute_reward from gym.GoalEnv. + + step(): + (similar to use_step() of RobotSupervisor) + This method steps the controller. + Note that the gym-inherited compute_reward method is used here. """ def __init__(self, timestep=None): @@ -118,8 +123,40 @@ def __init__(self, timestep=None): def step(self, action): """ - The basic step method is use-case specific and needs to be implemented - by the user and please use compute_reward inherited from gym.GoalEnv() - instead of get_reward(). + The basic step method that steps the controller, + calls the method that applies the action on the robot + and returns the (observations, reward, done, info) object. + + For RobotGoalSupervisor, the gym-inherited compute_reward + method is used. This method must be implemented by the + user, according to gym.GoalEnv, using achieved_goal and + desired_goal. + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Defined by the implementation + :param achieved_goal: the goal that was achieved during execution + :type achieved_goal: object + :param desired_goal: the desired goal that we asked the agent to + attempt to achieve + :type desired_goal: object + :param info: an info dictionary with additional information + :type info: object + :return: tuple, (observations, reward, done, info) as provided by the + corresponding methods as implemented for the use-case """ - raise NotImplementedError + if super(Supervisor, self).step(self.timestep) == -1: + exit() + + self.apply_action(action) + obs = self.get_observations() + info = self.get_info() + + return ( + obs, + self.compute_reward(obs["achieved_goal"], + obs["desired_goal"], + info), + self.is_done(), + info, + ) diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py index f7b23c5..4ed1f02 100644 --- a/deepbots/supervisor/controllers/supervisor_env.py +++ b/deepbots/supervisor/controllers/supervisor_env.py @@ -2,7 +2,26 @@ from controller import Supervisor -class SupervisorBasicEnv: +class SupervisorEnv(Supervisor, gym.Env): + """ + This class is the highest class in deepbots class hierarchy, inheriting + both the Webots Supervisor controller and the basic gym.Env. + + Refer to gym.Env documentation on how to implement a custom gym.Env + for additional functionality. + + This class contains abstract methods that guide the development process + for users that want to implement a simple environment. + + This class is not intended for user usage, but to provide a common + interface for all provided supervisor classes and make them + compatible with reinforcement learning agents that work with + the gym interface. Moreover, a problem-agnostic reset method is + provided. Please use any of the children supervisor classes to be + inherited by your own class, such as the RobotSupervisor class. + Nevertheless, advanced users can inherit this class to create + their own supervisor classes if they wish. + """ def step(self, action): """ @@ -24,6 +43,27 @@ def step(self, action): """ raise NotImplementedError + def reset(self): + """ + Used to reset the world to an initial state. + + Default, problem-agnostic, implementation of reset method, + using Webots-provided methods. + + *Note that this works properly only with Webots versions >R2020b + and must be overridden with a custom reset method when using + earlier versions. It is backwards compatible due to the fact + that the new reset method gets overridden by whatever the user + has previously implemented, so an old supervisor can be migrated + easily to use this class. + + :return: default observation provided by get_default_observation() + """ + self.simulationReset() + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) + return self.get_default_observation() + def get_default_observation(self): """ This method should be implemented to return a default/starting @@ -77,51 +117,7 @@ def get_info(self): raise NotImplementedError -class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv): - """ - This class is the highest class except SupervisorBasicEnv in deepbots - class hierarchy, inheriting the Webots Supervisor controller, the basic - gym.Env, and the basic RL functions. - - Refer to gym.Env documentation on how to implement a custom gym.Env - for additional functionality. - - This class contains abstract methods that guide the development process - for users that want to implement a simple environment. - - This class is not intended for user usage, but to provide a common - interface for all provided supervisor classes and make them - compatible with reinforcement learning agents that work with - the gym interface. Moreover, a problem-agnostic reset method is - provided. Please use any of the children supervisor classes to be - inherited by your own class, such as the RobotSupervisor class. - Nevertheless, advanced users can inherit this class to create - their own supervisor classes if they wish. - """ - - def reset(self): - """ - Used to reset the world to an initial state. - - Default, problem-agnostic, implementation of reset method, - using Webots-provided methods. - - *Note that this works properly only with Webots versions >R2020b - and must be overridden with a custom reset method when using - earlier versions. It is backwards compatible due to the fact - that the new reset method gets overridden by whatever the user - has previously implemented, so an old supervisor can be migrated - easily to use this class. - - :return: default observation provided by get_default_observation() - """ - self.simulationReset() - self.simulationResetPhysics() - super(Supervisor, self).step(int(self.getBasicTimeStep())) - return self.get_default_observation() - - -class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv): +class SupervisorGoalEnv(gym.GoalEnv, SupervisorEnv): """ This class is just like SupervisorEnv, but it imposes gym.GoalEnv.