aidudezzz · KelvinYang0320 · Dec 8, 2021 · Dec 12, 2021 · Dec 12, 2021 · Jun 9, 2022
diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py
@@ -1,5 +1,5 @@
 from warnings import warn, simplefilter
-from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv
+from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv, SupervisorGoalEnv
 from controller import Supervisor
 
 
@@ -99,3 +99,24 @@ def apply_action(self, action):
         :param action: list, containing action data
         """
         raise NotImplementedError
+
+class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor):
+    """
+    The RobotGoalSupervisor class is just like RobotSupervisor, but it 
+    uses compute_reward from gym.GoalEnv.
+    """
+    def __init__(self, timestep=None):
+        super(RobotGoalSupervisor, self).__init__()
+
+        if timestep is None:
+            self.timestep = int(self.getBasicTimeStep())
+        else:
+            self.timestep = timestep
+
+    def step(self, action):
+        """
+        The basic step method is use-case specific and needs to be implemented
+        by the user and please use compute_reward inherited from gym.GoalEnv() 
+        instead of get_reward().
+        """
+        raise NotImplementedError
diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/supervisor_env.py
@@ -2,27 +2,8 @@
 from controller import Supervisor
 
 
-class SupervisorEnv(Supervisor, gym.Env):
-    """
-    This class is the highest class in deepbots class hierarchy, inheriting
-    both the Webots Supervisor controller and the basic gym.Env.
-
-    Refer to gym.Env documentation on how to implement a custom gym.Env
-    for additional functionality.
-
-    This class contains abstract methods that guide the development process
-    for users that want to implement a simple environment.
-
-    This class is not intended for user usage, but to provide a common
-    interface for all provided supervisor classes and make them
-    compatible with reinforcement learning agents that work with
-    the gym interface. Moreover, a problem-agnostic reset method is
-    provided. Please use any of the children supervisor classes to be
-    inherited by your own class, such as the RobotSupervisor class.
-    Nevertheless, advanced users can inherit this class to create
-    their own supervisor classes if they wish.
-    """
-
+class SupervisorBasicEnv:
+
     def step(self, action):
         """
         On each timestep, the agent chooses an action for the previous
@@ -43,27 +24,6 @@ def step(self, action):
         """
         raise NotImplementedError
 
-    def reset(self):
-        """
-        Used to reset the world to an initial state.
-
-        Default, problem-agnostic, implementation of reset method,
-        using Webots-provided methods.
-
-        *Note that this works properly only with Webots versions >R2020b
-        and must be overridden with a custom reset method when using
-        earlier versions. It is backwards compatible due to the fact
-        that the new reset method gets overridden by whatever the user
-        has previously implemented, so an old supervisor can be migrated
-        easily to use this class.
-
-        :return: default observation provided by get_default_observation()
-        """
-        self.simulationReset()
-        self.simulationResetPhysics()
-        super(Supervisor, self).step(int(self.getBasicTimeStep()))
-        return self.get_default_observation()
-
     def get_default_observation(self):
         """
         This method should be implemented to return a default/starting
@@ -115,3 +75,79 @@ def get_info(self):
         information on each step, e.g. for debugging purposes.
         """
         raise NotImplementedError
+
+class SupervisorEnv(Supervisor, gym.Env, SupervisorBasicEnv):
+    """
+    This class is the highest class except SupervisorBasicEnv in deepbots 
+    class hierarchy, inheriting the Webots Supervisor controller, the basic 
+    gym.Env, and the basic RL functions.
+
+    Refer to gym.Env documentation on how to implement a custom gym.Env
+    for additional functionality.
+
+    This class contains abstract methods that guide the development process
+    for users that want to implement a simple environment.
+
+    This class is not intended for user usage, but to provide a common
+    interface for all provided supervisor classes and make them
+    compatible with reinforcement learning agents that work with
+    the gym interface. Moreover, a problem-agnostic reset method is
+    provided. Please use any of the children supervisor classes to be
+    inherited by your own class, such as the RobotSupervisor class.
+    Nevertheless, advanced users can inherit this class to create
+    their own supervisor classes if they wish.
+    """
+
+    def reset(self):
+        """
+        Used to reset the world to an initial state.
+
+        Default, problem-agnostic, implementation of reset method,
+        using Webots-provided methods.
+
+        *Note that this works properly only with Webots versions >R2020b
+        and must be overridden with a custom reset method when using
+        earlier versions. It is backwards compatible due to the fact
+        that the new reset method gets overridden by whatever the user
+        has previously implemented, so an old supervisor can be migrated
+        easily to use this class.
+
+        :return: default observation provided by get_default_observation()
+        """
+        self.simulationReset()
+        self.simulationResetPhysics()
+        super(Supervisor, self).step(int(self.getBasicTimeStep()))
+        return self.get_default_observation()
+
+
+
+class SupervisorGoalEnv(Supervisor, gym.GoalEnv, SupervisorBasicEnv):
+    """
+    This class is just like SupervisorEnv, but it imposes gym.GoalEnv. 
+
+    Refer to gym.GoalEnv documentation on how to implement a custom 
+    gym.GoalEnv for additional functionality.
+    """
+
+    def reset(self):
+        """
+        Used to reset the world to an initial state and enforce that each 
+        SupervisorGoalEnv uses a Goal-compatible observation space.
+
+        Default, problem-agnostic, implementation of reset method,
+        using Webots-provided methods.
+
+        *Note that this works properly only with Webots versions >R2020b
+        and must be overridden with a custom reset method when using
+        earlier versions. It is backwards compatible due to the fact
+        that the new reset method gets overridden by whatever the user
+        has previously implemented, so an old supervisor can be migrated
+        easily to use this class.
+
+        :return: default observation provided by get_default_observation()
+        """
+        super().reset() 
+        self.simulationReset()
+        self.simulationResetPhysics()
+        super(Supervisor, self).step(int(self.getBasicTimeStep()))
+        return self.get_default_observation()