Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gymnasium Deepworlds #101

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def run():

# Run outer loop until the episodes limit is reached or the task is solved
while not solved and episode_count < episode_limit:
state = env.reset() # Reset robot and get starting observation
state, _ = env.reset() # Reset robot and get starting observation
env.episode_score = 0
action_probs = [] # This list holds the probability of each chosen action

Expand All @@ -33,14 +33,14 @@ def run():

# Step the supervisor to get the current selected_action reward, the new state and whether we reached the
# done condition
new_state, reward, done, info = env.step([selected_action])
new_state, reward, terminated, truncated, info = env.step([selected_action])

# Save the current state transition in agent's memory
trans = Transition(state, selected_action, action_prob, reward, new_state)
agent.store_transition(trans)

env.episode_score += reward # Accumulate episode reward
if done:
if terminated or truncated:
# Save the episode's score
env.episode_score_list.append(env.episode_score)
agent.train_step(batch_size=step + 1)
Expand Down Expand Up @@ -74,10 +74,10 @@ def run():
env.episode_score = 0
while True:
selected_action, action_prob = agent.work(state, type_="selectActionMax")
state, reward, done, _ = env.step([selected_action])
state, reward, terminated, truncated, _ = env.step([selected_action])
env.episode_score += reward # Accumulate episode reward

if done:
if terminated or truncated:
print("Reward accumulated =", env.episode_score)
env.episode_score = 0
state = env.reset()
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from deepbots.supervisor import RobotSupervisorEnv
from utilities import normalize_to_range

from gym.spaces import Box, Discrete
from gymnasium.spaces import Box, Discrete
import numpy as np


Expand Down Expand Up @@ -109,7 +109,7 @@ def get_reward(self, action):
"""
return 1

def is_done(self):
def is_terminated(self):
"""
An episode is done if the score is over 195.0, or if the pole is off balance, or the cart position is on the
arena's edges.
Expand All @@ -130,6 +130,9 @@ def is_done(self):

return False

def is_truncated(self):
return False

def solved(self):
"""
This method checks whether the CartPole task is solved, so training terminates.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from numpy import convolve, ones, mean
import gym
import gymnasium as gym

from robot_supervisor import CartPoleRobotSupervisor
from utilities import plot_data
Expand All @@ -13,7 +13,7 @@ def run():
env = CartPoleRobotSupervisor()

# Verify that the environment is working as a gym-style env
#check_env(env)
check_env(env)

# Use the PPO algorithm from the stable baselines having MLP, verbose=1 output the training information
model = PPO("MlpPolicy", env, verbose=1)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from deepbots.supervisor import RobotSupervisorEnv
from utilities import normalize_to_range

from gym.spaces import Box, Discrete
from gymnasium.spaces import Box, Discrete
import numpy as np


Expand Down Expand Up @@ -109,9 +109,9 @@ def get_reward(self, action):
"""
return 1

def is_done(self):
def is_terminated(self):
"""
An episode is done if the score is over 195.0, or if the pole is off balance, or the cart position is on the
An episode is terminated if the score is over 195.0, or if the pole is off balance, or the cart position is on the
arena's edges.

:return: True if termination conditions are met, False otherwise
Expand All @@ -130,6 +130,9 @@ def is_done(self):

return False

def is_truncated(self):
return False

def solved(self):
"""
This method checks whether the CartPole task is solved, so training terminates.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def run():

# Run outer loop until the episodes limit is reached or the task is solved
while not solved and episode_count < episod_limit:
state = supervisor_env.reset() # Reset robot and get starting observation
state, _ = supervisor_env.reset() # Reset robot and get starting observation
supervisor_pre.episodeScore = 0
action_probs = [] # This list holds the probability of each chosen action

Expand All @@ -37,14 +37,14 @@ def run():

# Step the supervisor to get the current selected_action reward, the new state and whether we reached the
# done condition
new_state, reward, done, info = supervisor_env.step([selected_action])
new_state, reward, terminated, truncated, info = supervisor_env.step([selected_action])

# Save the current state transition in agent's memory
trans = Transition(state, selected_action, action_prob, reward, new_state)
agent.store_transition(trans)

supervisor_pre.episodeScore += reward # Accumulate episode reward
if done:
if terminated or truncated:
# Save the episode's score
supervisor_pre.episode_score_list.append(supervisor_pre.episodeScore)
agent.train_step(batch_size=step + 1)
Expand Down Expand Up @@ -85,10 +85,10 @@ def run():
supervisor_pre.episodeScore = 0
while True:
selected_action, action_prob = agent.work(state, type_="selectActionMax")
state, reward, done, _ = supervisor_env.step([selected_action])
state, reward, terminated, truncated, _ = supervisor_env.step([selected_action])
supervisor_pre.episodeScore += reward # Accumulate episode reward

if done:
if terminated or truncated:
print("Reward accumulated =", supervisor_pre.episodeScore)
supervisor_pre.episodeScore = 0
state = supervisor_env.reset()
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def step(self, action):

"R" invokes the environment's reset method resetting the simulation to its initial state.
"""
observation, reward, is_done, info = self.controller.step(action)
observation, reward, terminated, truncated, info = self.controller.step(action)
key = self.keyboard.getKey()

if key == ord("T") and not self.controller.test:
Expand All @@ -31,4 +31,4 @@ def step(self, action):
print("User invoked reset method.")
self.controller.reset()

return observation, reward, is_done, info
return observation, reward, terminated, truncated, info
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def get_reward(self, action=None):
"""
return 1

def is_done(self):
def is_terminated(self):
"""
An episode is done if the score is over 195.0, or if the pole is off balance, or the cart position is on the
arena's edges.
Expand All @@ -134,6 +134,9 @@ def is_done(self):

return False

def is_truncated(self):
return False

def get_info(self):
"""
Dummy implementation of get_info.
Expand Down