Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GoalEnv for Robot-Supervisor scheme #103

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion deepbots/supervisor/controllers/robot_supervisor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from warnings import warn, simplefilter
from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv
from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv, SupervisorGoalEnv
from controller import Supervisor


Expand Down Expand Up @@ -27,6 +27,7 @@ class RobotSupervisor(SupervisorEnv):
action, e.g. motor speeds.
Note that apply_action() is called during step().
"""

def __init__(self, timestep=None):
super(RobotSupervisor, self).__init__()

Expand Down Expand Up @@ -99,3 +100,63 @@ def apply_action(self, action):
:param action: list, containing action data
"""
raise NotImplementedError

KelvinYang0320 marked this conversation as resolved.
Show resolved Hide resolved

class RobotGoalSupervisor(SupervisorGoalEnv, RobotSupervisor):
"""
The RobotGoalSupervisor class is just like RobotSupervisor, but it
uses compute_reward from gym.GoalEnv.

step():
(similar to use_step() of RobotSupervisor)
This method steps the controller.
Note that the gym-inherited compute_reward method is used here.
"""

def __init__(self, timestep=None):
super(RobotGoalSupervisor, self).__init__()

if timestep is None:
self.timestep = int(self.getBasicTimeStep())
else:
self.timestep = timestep

def step(self, action):
"""
The basic step method that steps the controller,
calls the method that applies the action on the robot
and returns the (observations, reward, done, info) object.

For RobotGoalSupervisor, the gym-inherited compute_reward
method is used. This method must be implemented by the
user, according to gym.GoalEnv, using achieved_goal and
desired_goal.

:param action: Whatever the use-case uses as an action, e.g.
an integer representing discrete actions
:type action: Defined by the implementation
:param achieved_goal: the goal that was achieved during execution
:type achieved_goal: object
:param desired_goal: the desired goal that we asked the agent to
attempt to achieve
:type desired_goal: object
:param info: an info dictionary with additional information
:type info: object
:return: tuple, (observations, reward, done, info) as provided by the
corresponding methods as implemented for the use-case
"""
if super(Supervisor, self).step(self.timestep) == -1:
exit()

self.apply_action(action)
obs = self.get_observations()
info = self.get_info()

return (
obs,
self.compute_reward(obs["achieved_goal"],
obs["desired_goal"],
info),
self.is_done(),
info,
)
32 changes: 32 additions & 0 deletions deepbots/supervisor/controllers/supervisor_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,35 @@ def get_info(self):
information on each step, e.g. for debugging purposes.
"""
raise NotImplementedError


class SupervisorGoalEnv(gym.GoalEnv, SupervisorEnv):
"""
This class is just like SupervisorEnv, but it imposes gym.GoalEnv.

Refer to gym.GoalEnv documentation on how to implement a custom
gym.GoalEnv for additional functionality.
"""

def reset(self):
"""
Used to reset the world to an initial state and enforce that each
SupervisorGoalEnv uses a Goal-compatible observation space.

Default, problem-agnostic, implementation of reset method,
using Webots-provided methods.

*Note that this works properly only with Webots versions >R2020b
and must be overridden with a custom reset method when using
earlier versions. It is backwards compatible due to the fact
that the new reset method gets overridden by whatever the user
has previously implemented, so an old supervisor can be migrated
easily to use this class.

:return: default observation provided by get_default_observation()
"""
super().reset()
self.simulationReset()
self.simulationResetPhysics()
super(Supervisor, self).step(int(self.getBasicTimeStep()))
return self.get_default_observation()