From 6d5ae91846b102325bdcb188194648cd738bf924 Mon Sep 17 00:00:00 2001
From: KelvinYang0320 <kelvin777320@gmai.com>
Date: Wed, 8 Dec 2021 21:37:57 +0800
Subject: [PATCH 1/3] GoalEnv for Robot-Supervisor scheme

---
 .../controllers/robot_supervisor.py           |  23 +++-
 .../supervisor/controllers/supervisor_env.py  | 120 ++++++++++++------
 2 files changed, 100 insertions(+), 43 deletions(-)

diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py
index 50675ca..7565d18 100644
--- a/deepbots/supervisor/controllers/robot_supervisor.py
+++ b/deepbots/supervisor/controllers/robot_supervisor.py
@@ -1,5 +1,5 @@
 from warnings import warn, simplefilter
-from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv
+from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv, SupervisorGoalEnv
 from controller import Supervisor
 
 
@@ -99,3 +99,24 @@ def apply_action(self, action):
         :param action: list, containing action data
         """
         raise NotImplementedError
+
+class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor):
+    """
+    The RobotGoalSupervisor class is just like RobotSupervisor, but it 
+    uses compute_reward from gym.GoalEnv.
+    """
+    def __init__(self, timestep=None):
+        super(RobotGoalSupervisor, self).__init__()
+
+        if timestep is None:
+            self.timestep = int(self.getBasicTimeStep())
+        else:
+            self.timestep = timestep
+
+    def step(self, action):
+        """
+        The basic step method is use-case specific and needs to be implemented
+        by the user and please use compute_reward inherited from gym.GoalEnv() 
+        instead of get_reward().
+        """
+        raise NotImplementedError
\ No newline at end of file
diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py
index 78a58f0..3fb082f 100644
--- a/deepbots/supervisor/controllers/supervisor_env.py
+++ b/deepbots/supervisor/controllers/supervisor_env.py
@@ -2,27 +2,8 @@
 from controller import Supervisor
 
 
-class SupervisorEnv(Supervisor, gym.Env):
-    """
-    This class is the highest class in deepbots class hierarchy, inheriting
-    both the Webots Supervisor controller and the basic gym.Env.
-
-    Refer to gym.Env documentation on how to implement a custom gym.Env
-    for additional functionality.
-
-    This class contains abstract methods that guide the development process
-    for users that want to implement a simple environment.
-
-    This class is not intended for user usage, but to provide a common
-    interface for all provided supervisor classes and make them
-    compatible with reinforcement learning agents that work with
-    the gym interface. Moreover, a problem-agnostic reset method is
-    provided. Please use any of the children supervisor classes to be
-    inherited by your own class, such as the RobotSupervisor class.
-    Nevertheless, advanced users can inherit this class to create
-    their own supervisor classes if they wish.
-    """
-
+class SupervisorBasicEnv:
+    
     def step(self, action):
         """
         On each timestep, the agent chooses an action for the previous
@@ -43,27 +24,6 @@ def step(self, action):
         """
         raise NotImplementedError
 
-    def reset(self):
-        """
-        Used to reset the world to an initial state.
-
-        Default, problem-agnostic, implementation of reset method,
-        using Webots-provided methods.
-
-        *Note that this works properly only with Webots versions >R2020b
-        and must be overridden with a custom reset method when using
-        earlier versions. It is backwards compatible due to the fact
-        that the new reset method gets overridden by whatever the user
-        has previously implemented, so an old supervisor can be migrated
-        easily to use this class.
-
-        :return: default observation provided by get_default_observation()
-        """
-        self.simulationReset()
-        self.simulationResetPhysics()
-        super(Supervisor, self).step(int(self.getBasicTimeStep()))
-        return self.get_default_observation()
-
     def get_default_observation(self):
         """
         This method should be implemented to return a default/starting
@@ -115,3 +75,79 @@ def get_info(self):
         information on each step, e.g. for debugging purposes.
         """
         raise NotImplementedError
+
+class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv):
+    """
+    This class is the highest class except SupervisorBasicEnv in deepbots 
+    class hierarchy, inheriting the Webots Supervisor controller, the basic 
+    gym.Env, and the basic RL functions.
+
+    Refer to gym.Env documentation on how to implement a custom gym.Env
+    for additional functionality.
+
+    This class contains abstract methods that guide the development process
+    for users that want to implement a simple environment.
+
+    This class is not intended for user usage, but to provide a common
+    interface for all provided supervisor classes and make them
+    compatible with reinforcement learning agents that work with
+    the gym interface. Moreover, a problem-agnostic reset method is
+    provided. Please use any of the children supervisor classes to be
+    inherited by your own class, such as the RobotSupervisor class.
+    Nevertheless, advanced users can inherit this class to create
+    their own supervisor classes if they wish.
+    """
+
+    def reset(self):
+        """
+        Used to reset the world to an initial state.
+
+        Default, problem-agnostic, implementation of reset method,
+        using Webots-provided methods.
+
+        *Note that this works properly only with Webots versions >R2020b
+        and must be overridden with a custom reset method when using
+        earlier versions. It is backwards compatible due to the fact
+        that the new reset method gets overridden by whatever the user
+        has previously implemented, so an old supervisor can be migrated
+        easily to use this class.
+
+        :return: default observation provided by get_default_observation()
+        """
+        self.simulationReset()
+        self.simulationResetPhysics()
+        super(Supervisor, self).step(int(self.getBasicTimeStep()))
+        return self.get_default_observation()
+
+    
+
+class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv):
+    """
+    This class is just like SupervisorEnv, but it imposes gym.GoalEnv. 
+
+    Refer to gym.GoalEnv documentation on how to implement a custom 
+    gym.GoalEnv for additional functionality.
+    """
+
+    def reset(self):
+        """
+        Used to reset the world to an initial state and enforce that each 
+        SupervisorGoalEnv uses a Goal-compatible observation space.
+
+        Default, problem-agnostic, implementation of reset method,
+        using Webots-provided methods.
+
+        *Note that this works properly only with Webots versions >R2020b
+        and must be overridden with a custom reset method when using
+        earlier versions. It is backwards compatible due to the fact
+        that the new reset method gets overridden by whatever the user
+        has previously implemented, so an old supervisor can be migrated
+        easily to use this class.
+
+        :return: default observation provided by get_default_observation()
+        """
+        super().reset() 
+        self.simulationReset()
+        self.simulationResetPhysics()
+        super(Supervisor, self).step(int(self.getBasicTimeStep()))
+        return self.get_default_observation()
\ No newline at end of file

From 2d4400d7fb32de4e3a82644ac34453448bdf8adf Mon Sep 17 00:00:00 2001
From: KelvinYang0320 <kelvin777320@gmai.com>
Date: Sun, 12 Dec 2021 12:20:18 +0800
Subject: [PATCH 2/3] PEP8

---
 deepbots/supervisor/controllers/robot_supervisor.py | 5 ++++-
 deepbots/supervisor/controllers/supervisor_env.py   | 8 ++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py
index 7565d18..7646c93 100644
--- a/deepbots/supervisor/controllers/robot_supervisor.py
+++ b/deepbots/supervisor/controllers/robot_supervisor.py
@@ -27,6 +27,7 @@ class RobotSupervisor(SupervisorEnv):
     action, e.g. motor speeds.
     Note that apply_action() is called during step().
     """
+
     def __init__(self, timestep=None):
         super(RobotSupervisor, self).__init__()
 
@@ -100,11 +101,13 @@ def apply_action(self, action):
         """
         raise NotImplementedError
 
+
 class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor):
     """
     The RobotGoalSupervisor class is just like RobotSupervisor, but it 
     uses compute_reward from gym.GoalEnv.
     """
+
     def __init__(self, timestep=None):
         super(RobotGoalSupervisor, self).__init__()
 
@@ -119,4 +122,4 @@ def step(self, action):
         by the user and please use compute_reward inherited from gym.GoalEnv() 
         instead of get_reward().
         """
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py
index 3fb082f..f7b23c5 100644
--- a/deepbots/supervisor/controllers/supervisor_env.py
+++ b/deepbots/supervisor/controllers/supervisor_env.py
@@ -3,7 +3,7 @@
 
 
 class SupervisorBasicEnv:
-    
+
     def step(self, action):
         """
         On each timestep, the agent chooses an action for the previous
@@ -76,6 +76,7 @@ def get_info(self):
         """
         raise NotImplementedError
 
+
 class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv):
     """
     This class is the highest class except SupervisorBasicEnv in deepbots 
@@ -119,7 +120,6 @@ def reset(self):
         super(Supervisor, self).step(int(self.getBasicTimeStep()))
         return self.get_default_observation()
 
-    
 
 class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv):
     """
@@ -146,8 +146,8 @@ def reset(self):
 
         :return: default observation provided by get_default_observation()
         """
-        super().reset() 
+        super().reset()
         self.simulationReset()
         self.simulationResetPhysics()
         super(Supervisor, self).step(int(self.getBasicTimeStep()))
-        return self.get_default_observation()
\ No newline at end of file
+        return self.get_default_observation()

From 4fbfd891f083853f19b617a6d777cefae868b60d Mon Sep 17 00:00:00 2001
From: KelvinYang0320 <kelvin777320@gmai.com>
Date: Sun, 12 Dec 2021 20:55:35 +0800
Subject: [PATCH 3/3] better class inheritance

---
 .../controllers/robot_supervisor.py           | 45 +++++++++-
 .../supervisor/controllers/supervisor_env.py  | 88 +++++++++----------
 2 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py
index 7646c93..4c97b8e 100644
--- a/deepbots/supervisor/controllers/robot_supervisor.py
+++ b/deepbots/supervisor/controllers/robot_supervisor.py
@@ -106,6 +106,11 @@ class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor):
     """
     The RobotGoalSupervisor class is just like RobotSupervisor, but it 
     uses compute_reward from gym.GoalEnv.
+
+    step():
+    (similar to use_step() of RobotSupervisor)
+    This method steps the controller.
+    Note that the gym-inherited compute_reward method is used here.
     """
 
     def __init__(self, timestep=None):
@@ -118,8 +123,40 @@ def __init__(self, timestep=None):
 
     def step(self, action):
         """
-        The basic step method is use-case specific and needs to be implemented
-        by the user and please use compute_reward inherited from gym.GoalEnv() 
-        instead of get_reward().
+        The basic step method that steps the controller,
+        calls the method that applies the action on the robot
+        and returns the (observations, reward, done, info) object.
+
+        For RobotGoalSupervisor, the gym-inherited compute_reward
+        method is used. This method must be implemented by the
+        user, according to gym.GoalEnv, using achieved_goal and
+        desired_goal.
+
+        :param action: Whatever the use-case uses as an action, e.g.
+            an integer representing discrete actions
+        :type action: Defined by the implementation
+        :param achieved_goal: the goal that was achieved during execution
+        :type achieved_goal: object
+        :param desired_goal: the desired goal that we asked the agent to 
+            attempt to achieve
+        :type desired_goal: object
+        :param info: an info dictionary with additional information
+        :type info: object
+        :return: tuple, (observations, reward, done, info) as provided by the
+            corresponding methods as implemented for the use-case
         """
-        raise NotImplementedError
+        if super(Supervisor, self).step(self.timestep) == -1:
+            exit()
+
+        self.apply_action(action)
+        obs = self.get_observations()
+        info = self.get_info()
+
+        return (
+            obs,
+            self.compute_reward(obs["achieved_goal"],
+                                obs["desired_goal"],
+                                info),
+            self.is_done(),
+            info,
+        )
diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py
index f7b23c5..4ed1f02 100644
--- a/deepbots/supervisor/controllers/supervisor_env.py
+++ b/deepbots/supervisor/controllers/supervisor_env.py
@@ -2,7 +2,26 @@
 from controller import Supervisor
 
 
-class SupervisorBasicEnv:
+class SupervisorEnv(Supervisor, gym.Env):
+    """
+    This class is the highest class in deepbots class hierarchy, inheriting
+    both the Webots Supervisor controller and the basic gym.Env.
+
+    Refer to gym.Env documentation on how to implement a custom gym.Env
+    for additional functionality.
+
+    This class contains abstract methods that guide the development process
+    for users that want to implement a simple environment.
+
+    This class is not intended for user usage, but to provide a common
+    interface for all provided supervisor classes and make them
+    compatible with reinforcement learning agents that work with
+    the gym interface. Moreover, a problem-agnostic reset method is
+    provided. Please use any of the children supervisor classes to be
+    inherited by your own class, such as the RobotSupervisor class.
+    Nevertheless, advanced users can inherit this class to create
+    their own supervisor classes if they wish.
+    """
 
     def step(self, action):
         """
@@ -24,6 +43,27 @@ def step(self, action):
         """
         raise NotImplementedError
 
+    def reset(self):
+        """
+        Used to reset the world to an initial state.
+
+        Default, problem-agnostic, implementation of reset method,
+        using Webots-provided methods.
+
+        *Note that this works properly only with Webots versions >R2020b
+        and must be overridden with a custom reset method when using
+        earlier versions. It is backwards compatible due to the fact
+        that the new reset method gets overridden by whatever the user
+        has previously implemented, so an old supervisor can be migrated
+        easily to use this class.
+
+        :return: default observation provided by get_default_observation()
+        """
+        self.simulationReset()
+        self.simulationResetPhysics()
+        super(Supervisor, self).step(int(self.getBasicTimeStep()))
+        return self.get_default_observation()
+
     def get_default_observation(self):
         """
         This method should be implemented to return a default/starting
@@ -77,51 +117,7 @@ def get_info(self):
         raise NotImplementedError
 
 
-class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv):
-    """
-    This class is the highest class except SupervisorBasicEnv in deepbots 
-    class hierarchy, inheriting the Webots Supervisor controller, the basic 
-    gym.Env, and the basic RL functions.
-
-    Refer to gym.Env documentation on how to implement a custom gym.Env
-    for additional functionality.
-
-    This class contains abstract methods that guide the development process
-    for users that want to implement a simple environment.
-
-    This class is not intended for user usage, but to provide a common
-    interface for all provided supervisor classes and make them
-    compatible with reinforcement learning agents that work with
-    the gym interface. Moreover, a problem-agnostic reset method is
-    provided. Please use any of the children supervisor classes to be
-    inherited by your own class, such as the RobotSupervisor class.
-    Nevertheless, advanced users can inherit this class to create
-    their own supervisor classes if they wish.
-    """
-
-    def reset(self):
-        """
-        Used to reset the world to an initial state.
-
-        Default, problem-agnostic, implementation of reset method,
-        using Webots-provided methods.
-
-        *Note that this works properly only with Webots versions >R2020b
-        and must be overridden with a custom reset method when using
-        earlier versions. It is backwards compatible due to the fact
-        that the new reset method gets overridden by whatever the user
-        has previously implemented, so an old supervisor can be migrated
-        easily to use this class.
-
-        :return: default observation provided by get_default_observation()
-        """
-        self.simulationReset()
-        self.simulationResetPhysics()
-        super(Supervisor, self).step(int(self.getBasicTimeStep()))
-        return self.get_default_observation()
-
-
-class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv):
+class SupervisorGoalEnv(gym.GoalEnv, SupervisorEnv):
     """
     This class is just like SupervisorEnv, but it imposes gym.GoalEnv.