diff --git a/.all-contributorsrc b/.all-contributorsrc new file mode 100644 index 0000000..b9cf524 --- /dev/null +++ b/.all-contributorsrc @@ -0,0 +1,71 @@ +{ + "files": [ + "README.md" + ], + "imageSize": 100, + "commit": false, + "contributors": [ + { + "login": "ManosMagnus", + "name": "Manos Kirtas", + "avatar_url": "https://avatars.githubusercontent.com/u/10010230?v=4", + "profile": "http://eakirtas.webpages.auth.gr/", + "contributions": [ + "code" + ] + }, + { + "login": "tsampazk", + "name": "Kostas Tsampazis", + "avatar_url": "https://avatars.githubusercontent.com/u/27914645?v=4", + "profile": "https://github.com/tsampazk", + "contributions": [ + "code" + ] + }, + { + "login": "KelvinYang0320", + "name": "Jiun Kai Yang", + "avatar_url": "https://avatars.githubusercontent.com/u/49781698?v=4", + "profile": "https://www.linkedin.com/in/kelvin-yang-b7b508198/", + "contributions": [ + "code" + ] + }, + { + "login": "MentalGear", + "name": "MentalGear", + "avatar_url": "https://avatars.githubusercontent.com/u/2837147?v=4", + "profile": "https://github.com/MentalGear", + "contributions": [ + "ideas" + ] + }, + { + "login": "DreamtaleCore", + "name": "Dreamtale", + "avatar_url": "https://avatars.githubusercontent.com/u/12713528?v=4", + "profile": "https://github.com/DreamtaleCore", + "contributions": [ + "bug" + ] + }, + { + "login": "NickKok", + "name": "Nikolaos Kokkinis-Ntrenis", + "avatar_url": "https://avatars.githubusercontent.com/u/8222731?v=4", + "profile": "https://nickkok.github.io/my-website/", + "contributions": [ + "code", + "doc", + "ideas" + ] + } + ], + "contributorsPerLine": 7, + "projectName": "deepbots", + "projectOwner": "aidudezzz", + "repoType": "github", + "repoHost": "https://github.com", + "skipCi": true +} diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 0b8b7f9..5978864 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.3-dev1 +current_version = 0.1.3-dev4 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? @@ -20,3 +20,7 @@ values = [bumpversion:file:VERSION] [bumpversion:file:setup.py] + +[bumpversion:file:deepbots/__init__.py] +search = "{current_version}" +replace = "{new_version}" diff --git a/.github/workflows/changelog_update.yaml b/.github/workflows/changelog_update.yaml new file mode 100644 index 0000000..c8bc355 --- /dev/null +++ b/.github/workflows/changelog_update.yaml @@ -0,0 +1,30 @@ +name: Generate Changelog +on: + push: + branches: + - main + +jobs: + generate_changelog: + name: Update Changelog + runs-on: ubuntu-latest + steps: + - name: Checkout dev + uses: actions/checkout@v2 + + - name: Generate changelog + uses: charmixer/auto-changelog-action@v1.1 + with: + token: ${{ secrets.AIDUDEZZ_BOT_TOKEN }} + + - name: Commit files + run: | + git config --local user.email ${{ secrets.AIDUDEZZZ_BOT_EMAIL }} + git config --local user.name ${{ secrets.AIDUDEZZZ_BOT_USER }} + git add CHANGELOG.md && git commit -m 'Updated CHANGELOG.md' && echo "push=true" >> $GITHUB_ENV || echo "No changes to CHANGELOG.md" + - name: Push changes + if: env.push == 'true' + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.AIDUDEZZ_BOT_TOKEN }} + branch: main diff --git a/.github/workflows/python_build.yaml b/.github/workflows/python_build.yaml index 5946e5f..9618aca 100644 --- a/.github/workflows/python_build.yaml +++ b/.github/workflows/python_build.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v1 diff --git a/.isort.cfg b/.isort.cfg index f3b067f..e9b9c5f 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,2 +1,2 @@ [settings] -known_third_party = controller,numpy,setuptools,tensorboardX +known_third_party = controller,cv2,gym,numpy,ray,setuptools,tensorboardX diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bae66a..e59b980 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,7 @@ repos: hooks: - id: prettier args: [--prose-wrap=always, --print-width=79] + exclude: README.md - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 diff --git a/README.md b/README.md index c2ed0b4..0cf16e9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,12 @@ -# deepbots +

+ +

+ +[![Version](https://img.shields.io/pypi/v/deepbots?color=green)](https://pypi.org/project/deepbots/) +[![Dev Version](https://img.shields.io/github/v/tag/aidudezzz/deepbots?include_prereleases&label=test-pypi&color=green)](https://test.pypi.org/project/deepbots/) +[![Downloads](https://static.pepy.tech/personalized-badge/deepbots?period=total&units=international_system&left_color=grey&right_color=green&left_text=Downloads)](https://pepy.tech/project/deepbots) +[![License](https://img.shields.io/github/license/aidudezzz/deepbots?color=green)](https://github.com/aidudezzz/deepbots/blob/dev/LICENSE) +[![All Contributors](https://img.shields.io/badge/all_contributors-6-orange.svg?style=flat-square)](#contributors-) Deepbots is a simple framework which is used as "middleware" between the free and open-source [Cyberbotics' Webots](https://cyberbotics.com/) robot simulator @@ -60,16 +68,12 @@ https://link.springer.com/chapter/10.1007/978-3-030-49186-4_6 and Tsampazis, K. and Passalis, N. and Tefas, A.", - editor="Maglogiannis, Ilias - and Iliadis, Lazaros - and Pimenidis, Elias", title="Deepbots: A Webots-Based Deep Reinforcement Learning Framework for Robotics", booktitle="Artificial Intelligence Applications and Innovations", year="2020", publisher="Springer International Publishing", address="Cham", pages="64--75", - abstract="Deep Reinforcement Learning (DRL) is increasingly used to train robots to perform complex and delicate tasks, while the development of realistic simulators contributes to the acceleration of research on DRL for robotics. However, it is still not straightforward to employ such simulators in the typical DRL pipeline, since their steep learning curve and the enormous amount of development required to interface with DRL methods significantly restrict their use by researchers. To overcome these limitations, in this work we present an open-source framework that combines an established interface used by DRL researchers, the OpenAI Gym interface, with the state-of-the-art Webots robot simulator in order to provide a standardized way to employ DRL in various robotics scenarios. Deepbots aims to enable researchers to easily develop DRL methods in Webots by handling all the low-level details and reducing the required development effort. The effectiveness of the proposed framework is demonstrated through code examples, as well as using three use cases of varying difficulty.", isbn="978-3-030-49186-4" } @@ -142,26 +146,27 @@ those who are familiar with the OpenAI gym environment. More specifically, algorithms and follows the OpenAI Gym environment logic. The Deepbots framework provides different levels of abstraction according to the user's needs. Moreover, a goal of the framework is to provide different wrappers for a wide -range of robots. +range of robots. Deepbots also provides a default implementation of the `reset()` method, -leveraging Webots' built-in simulation reset functions, removing the need -for the user to implement reset procedures for simpler use-cases. It is -always possible to override this method and implement any custom reset -procedure, as needed. +leveraging Webots' built-in simulation reset functions, removing the need for +the user to implement reset procedures for simpler use-cases. It is always +possible to override this method and implement any custom reset procedure, as +needed. #### Emitter - receiver scheme -Currently, the communication between the `Supervisor` and the -`Robot` is achieved via an `emitter` and a `receiver`. Separating the `Supervisor` -from the `Robot`, deepbots can fit a variety of use-cases, e.g. multiple -`Robots` collecting experience and a `Supervisor` controlling them with a single -agent. The way Webots implements `emitter`/`receiver` communication requires messages -to be packed and unpacked, which introduces an overhead that becomes prohibiting in -use-cases where the observations are high-dimensional or long, such as camera images. -Deepbots provides another partially abstract class that combines the `Supervisor` -and the `Robot` into one controller and circumvents that issue, while being less -flexible, which is discussed [later](#combined-robot-supervisor-scheme). +Currently, the communication between the `Supervisor` and the `Robot` is +achieved via an `emitter` and a `receiver`. Separating the `Supervisor` from +the `Robot`, deepbots can fit a variety of use-cases, e.g. multiple `Robots` +collecting experience and a `Supervisor` controlling them with a single agent. +The way Webots implements `emitter`/`receiver` communication requires messages +to be packed and unpacked, which introduces an overhead that becomes +prohibiting in use-cases where the observations are high-dimensional or long, +such as camera images. Deepbots provides another partially abstract class that +combines the `Supervisor` and the `Robot` into one controller and circumvents +that issue, while being less flexible, which is discussed +[later](#combined-robot-supervisor-scheme).

@@ -170,16 +175,20 @@ flexible, which is discussed [later](#combined-robot-supervisor-scheme). On one hand, the `emitter` is an entity which is provided by Webots, that broadcasts messages to the world. On the other hand, the `receiver` is an entity that is used to receive messages from the `World`. Consequently, the -agent-environment loop is transformed accordingly. Firstly, the `Robot` uses its -sensors to retrieve the observation from the `World` and in turn uses the `emitter` -component to broadcast this observation. Secondly, the `Supervisor` receives the -observation via the `receiver` component and in turn, the agent uses it to choose -an action. It should be noted that the observation the agent uses might be -extended from the `Supervisor`. For example, a model might use LiDAR sensors -installed on the `Robot`, but also the Euclidean distance between the `Robot` and -an object. As it is expected, the `Robot` does not know the Euclidean distance, -only the `Supervisor` can calculate it, because it has access to all entities in -the `World`. +agent-environment loop is transformed accordingly. Firstly, the `Robot` uses +its sensors to retrieve the observation from the `World` and in turn uses the +`emitter` component to broadcast this observation. Secondly, the `Supervisor` +receives the observation via the `receiver` component and in turn, the agent +uses it to choose an action. It should be noted that the observation the agent +uses might be extended from the `Supervisor`. For example, a model might use +LiDAR sensors installed on the `Robot`, but also the Euclidean distance between +the `Robot` and an object. As it is expected, the `Robot` does not know the +Euclidean distance, only the `Supervisor` can calculate it, because it has +access to all entities in the `World`. + +You can follow the +[emitter-receiver scheme tutorial](https://github.com/aidudezzz/deepbots-tutorials/blob/master/emitterReceiverSchemeTutorial/README.md) +to get started and work your way up from there.

@@ -187,14 +196,19 @@ the `World`. #### Combined Robot-Supervisor scheme -As mentioned earlier, in use-cases where the observation transmitted between -the `Robot` and the `Supervisor` is high-dimensional or long, e.g. high resolution -images taken from a camera, a significant overhead is introduced. This is circumvented -by inheriting and implementing the partially abstract `RobotSupervisor` that combines -the `Robot controller` and the `Supervisor Controller` into one, forgoing all -`emitter`/`receiver` communication. This new controller runs on the `Robot`, but -requires `Supervisor` privileges and is limited to one `Robot`, one `Supervisor`. +As mentioned earlier, in use-cases where the observation transmitted between +the `Robot` and the `Supervisor` is high-dimensional or long, e.g. high +resolution images taken from a camera, a significant overhead is introduced. +This is circumvented by inheriting and implementing the partially abstract +`RobotSupervisor` that combines the `Robot controller` and the +`Supervisor Controller` into one, forgoing all `emitter`/`receiver` +communication. This new controller runs on the `Robot`, but requires +`Supervisor` privileges and is limited to one `Robot`, one `Supervisor`. +You can follow the +[robot-supervisor scheme tutorial](https://github.com/aidudezzz/deepbots-tutorials/tree/master/robotSupervisorSchemeTutorial) +to get started and work your way up from there. We recommended this +tutorial to get started with deepbots. ### Abstraction Levels @@ -203,15 +217,49 @@ aim of the framework is to enable people to use Reinforcement Learning in Webots. More specifically, we can consider deepbots as a wrapper of Webots exposing an OpenAI gym style interface. For this reason there are multiple levels of abstraction. For example, a user can choose if they want to use CSV -`emitter`/`receiver` or if they want to make an implementation from scratch. -In the top level of the abstraction hierarchy is the `SupervisorEnv` which is the +`emitter`/`receiver` or if they want to make an implementation from scratch. In +the top level of the abstraction hierarchy is the `SupervisorEnv` which is the OpenAI gym interface. Below that level there are partially implemented classes -with common functionality. These implementations aim to hide the communication +with common functionality. These implementations aim to hide the communication between the `Supervisor` and the `Robot`, as described in the two different -schemes ealier. Similarly, in the `emitter`/`receiver` scheme the `Robot` -also has different abstraction levels. According to their needs, users can choose -either to process the messages received from the `Supervisor` themselves or -use the existing implementations. - -### Acknowledgments -This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 871449 (OpenDR). This publication reflects the authors’ views only. The European Commission is not responsible for any use that may be made of the information it contains. +schemes ealier. Similarly, in the `emitter`/`receiver` scheme the `Robot` also +has different abstraction levels. According to their needs, users can choose +either to process the messages received from the `Supervisor` themselves or use +the existing implementations. + +### Acknowledgments + +This project has received funding from the European Union's Horizon 2020 +research and innovation programme under grant agreement No 871449 (OpenDR). +This publication reflects the authors’ views only. The European Commission is +not responsible for any use that may be made of the information it contains. + +## Contributors ✨ + +Thanks goes to these wonderful people +([emoji key](https://allcontributors.org/docs/en/emoji-key)): + + + + + + + + + + + + + +

Manos Kirtas

💻

Kostas Tsampazis

💻

Jiun Kai Yang

💻

MentalGear

🤔

Dreamtale

🐛

Nikolaos Kokkinis-Ntrenis

💻 📖 🤔
+ + + + + + +This project follows the +[all-contributors](https://github.com/all-contributors/all-contributors) +specification. Contributions of any kind welcome! + + Special thanks to Papanikolaou Evangelia for designing project's logo! diff --git a/VERSION b/VERSION index a87819e..f19d64c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.3-dev1 +0.1.3-dev4 diff --git a/deepbots/__init__.py b/deepbots/__init__.py index 1672089..4a3de58 100644 --- a/deepbots/__init__.py +++ b/deepbots/__init__.py @@ -1 +1 @@ -__version__ = "0.0.0.11-pre" +__version__ = "0.1.3-dev4" diff --git a/deepbots/robots/__init__.py b/deepbots/robots/__init__.py index e69de29..e8cafa6 100644 --- a/deepbots/robots/__init__.py +++ b/deepbots/robots/__init__.py @@ -0,0 +1,2 @@ +from deepbots.robots.controllers.csv_robot import CSVRobot +from deepbots.robots.controllers.emitter_receiver_robot import EmitterReceiverRobot diff --git a/deepbots/robots/controllers/robot_emitter_receiver_csv.py b/deepbots/robots/controllers/csv_robot.py similarity index 65% rename from deepbots/robots/controllers/robot_emitter_receiver_csv.py rename to deepbots/robots/controllers/csv_robot.py index cecfb62..b3344d3 100644 --- a/deepbots/robots/controllers/robot_emitter_receiver_csv.py +++ b/deepbots/robots/controllers/csv_robot.py @@ -1,30 +1,46 @@ -from abc import abstractmethod from collections.abc import Iterable -from .robot_emitter_receiver import RobotEmitterReceiver +from deepbots.robots.controllers.emitter_receiver_robot import \ + EmitterReceiverRobot -class RobotEmitterReceiverCSV(RobotEmitterReceiver): +class CSVRobot(EmitterReceiverRobot): """ Basic implementation of a robot that can emit and receive messages to/from - the supervisor in string utf-8 form that are comma separated, i.e. a list. + the supervisor in string utf-8 form that are Comma Separated Values, + i.e. a list. """ def __init__(self, emitter_name="emitter", receiver_name="receiver", timestep=None): - super().__init__(timestep=timestep) + """ + The constructor just passes the arguments provided to the parent + class contructor. + + :param emitter_name: The name of the emitter device on the + robot node, defaults to "emitter" + :param receiver_name: The name of the receiver device on the + robot node, defaults to "receiver" + :param timestep: The robot controller timestep, defaults to None + """ + super().__init__(emitter_name, receiver_name, + timestep) def initialize_comms(self, emitter_name, receiver_name): """ This method implements the basic emitter/receiver initialization that - assumes that an emitter and a receiver components are present on the + assumes that an emitter and a receiver component are present on the Webots robot with appropriate DEFs ("emitter"/"receiver"). - :return: emitter and receiver references + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :return: The initialized emitter and receiver references """ - emitter = self.robot.getDevice("emitter") - receiver = self.robot.getDevice("receiver") + emitter = self.getDevice(emitter_name) + receiver = self.getDevice(receiver_name) receiver.enable(self.timestep) return emitter, receiver @@ -62,7 +78,10 @@ def handle_receiver(self): """ if self.receiver.getQueueLength() > 0: # Receive and decode message from supervisor - message = self.receiver.getData().decode("utf-8") + try: + message = self.receiver.getString() + except AttributeError: + message = self.receiver.getData().decode("utf-8") # Convert string message into a list message = message.split(",") @@ -70,7 +89,6 @@ def handle_receiver(self): self.receiver.nextPacket() - @abstractmethod def create_message(self): """ This method should be implemented to convert whatever data the robot @@ -79,13 +97,13 @@ def create_message(self): :return: a list or a comma-separated string containing all data """ - pass + raise NotImplementedError - @abstractmethod def use_message_data(self, message): """ This method should be implemented to apply whatever actions the message (received from the supervisor) contains. + :param message: list containing data received from the supervisor """ - pass + raise NotImplementedError diff --git a/deepbots/robots/controllers/robot_emitter_receiver.py b/deepbots/robots/controllers/emitter_receiver_robot.py similarity index 60% rename from deepbots/robots/controllers/robot_emitter_receiver.py rename to deepbots/robots/controllers/emitter_receiver_robot.py index 3ab19a0..40003ae 100644 --- a/deepbots/robots/controllers/robot_emitter_receiver.py +++ b/deepbots/robots/controllers/emitter_receiver_robot.py @@ -1,19 +1,19 @@ -from abc import ABC, abstractmethod - +from warnings import warn, simplefilter from controller import Robot -class RobotEmitterReceiver(ABC): +class EmitterReceiverRobot(Robot): """ - This RobotEmitterReceiver implements only the most basic run method, that - steps the robot and calls the handleEmitter, handleReceiver methods that + This EmitterReceiverRobot implements only the most basic run method, that + steps the robot and calls the handle_emitter, handle_receiver methods that are needed for communication with the supervisor. This class must be inherited by all robot controllers created by the user - and the handleEmitter, handleReceiver, initialize_comms methods are all - abstract and need to be implemented, according to their docstrings. For a + and the handle_emitter, handle_receiver, initialize_comms methods are all + abstract and need to be implemented according to their docstrings. For a simpler RobotController that implements the methods in a basic form - inherit the RobotEmitterReceiver class. + inherit the CSVRobot subclass or other emitter-receiver + subclasses. """ def __init__(self, emitter_name="emitter", @@ -28,17 +28,17 @@ def __init__(self, Also initializes the emitter and the receiver used to communicate with the supervisor, using the initialize_comms() method which must be implemented by the user. The two methods handle_emitter() and - handle_receiver() must also be implemented by the user. + handle_receiver() are also implemented by the user. For the step argument see relevant Webots documentation: https://cyberbotics.com/doc/guide/controller-programming#the-step-and-wb_robot_step-functions - :param timestep: float, positive or None + :param timestep: int, positive or None """ - self.robot = Robot() + super().__init__() if timestep is None: - self.timestep = int(self.robot.getBasicTimeStep()) + self.timestep = int(self.getBasicTimeStep()) else: self.timestep = timestep @@ -46,35 +46,55 @@ def __init__(self, emitter_name, receiver_name) def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) return self.timestep - @abstractmethod + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) + def initialize_comms(self, emitter_name, receiver_name): """ - This method should initialize and the return emitter and receiver in a + This method should initialize and return the emitter and receiver in a tuple as expected by the constructor. A basic example implementation can be: - emitter = self.robot.getDevice("emitter") - receiver = self.robot.getDevice("receiver") + emitter = self.getDevice("emitter") + receiver = self.getDevice("receiver") receiver.enable(self.timestep) return emitter, receiver :return: (emitter, receiver) tuple, as initialized """ - pass + raise NotImplementedError - @abstractmethod def handle_emitter(self): """ This method should take data from the robot, eg. sensor data, parse it into a message and use the robot's emitter to send it to the supervisor. This message will be used as the observation of the robot. """ - pass + raise NotImplementedError - @abstractmethod def handle_receiver(self): """ This method should take data through the receiver in the form of a @@ -83,7 +103,7 @@ def handle_receiver(self): For example the message might include a motor speed, which should be parsed and applied to the robot's motor. """ - pass + raise NotImplementedError def run(self): """ @@ -93,6 +113,6 @@ def run(self): This method should be called by a robot manager to run the robot. """ - while self.robot.step(self.timestep) != -1: + while self.step(self.timestep) != -1: self.handle_receiver() self.handle_emitter() diff --git a/deepbots/setup/__init__.py b/deepbots/setup/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/deepbots/setup/deepworlds_setup.py b/deepbots/setup/deepworlds_setup.py deleted file mode 100644 index e69de29..0000000 diff --git a/deepbots/supervisor/__init__.py b/deepbots/supervisor/__init__.py index e69de29..aa8fafd 100644 --- a/deepbots/supervisor/__init__.py +++ b/deepbots/supervisor/__init__.py @@ -0,0 +1,4 @@ +from deepbots.supervisor.controllers.robot_supervisor_env import RobotSupervisorEnv +from deepbots.supervisor.controllers.csv_supervisor_env import CSVSupervisorEnv +from deepbots.supervisor.controllers.deepbots_supervisor_env import DeepbotsSupervisorEnv +from deepbots.supervisor.controllers.emitter_receiver_supervisor_env import EmitterReceiverSupervisorEnv diff --git a/deepbots/supervisor/controllers/csv_supervisor_env.py b/deepbots/supervisor/controllers/csv_supervisor_env.py new file mode 100644 index 0000000..692bb7f --- /dev/null +++ b/deepbots/supervisor/controllers/csv_supervisor_env.py @@ -0,0 +1,61 @@ +from collections.abc import Iterable + +from deepbots.supervisor.controllers.emitter_receiver_supervisor_env import EmitterReceiverSupervisorEnv + + +class CSVSupervisorEnv(EmitterReceiverSupervisorEnv): + """ + This class implements the emitter-receiver scheme using Comma Separated + Values. + """ + def __init__(self, + emitter_name="emitter", + receiver_name="receiver", + timestep=None): + """ + The constructor just passes the arguments provided to the parent + class contructor. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :param timestep: The supervisor controller timestep + """ + super(CSVSupervisorEnv, self).__init__(emitter_name, receiver_name, + timestep) + + def handle_emitter(self, action): + """ + Implementation of the handle_emitter method expecting an iterable + with Comma Separated Values (CSV). + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Iterable, for multiple values the CSV format is + required, e.g. [0, 1] for two actions + """ + assert isinstance(action, Iterable), \ + "The action object should be Iterable" + + message = (",".join(map(str, action))).encode("utf-8") + self.emitter.send(message) + + def handle_receiver(self): + """ + Implementation of the handle_receiver method expecting an iterable + with Comma Separated Values (CSV). + + :return: Returns the message received from the robot, returns None + if no message is received + :rtype: List of string values + """ + if self.receiver.getQueueLength() > 0: + try: + string_message = self.receiver.getString() + except AttributeError: + string_message = self.receiver.getData().decode("utf-8") + self.receiver.nextPacket() + return string_message.split(",") + else: + return None diff --git a/deepbots/supervisor/controllers/supervisor_env.py b/deepbots/supervisor/controllers/deepbots_supervisor_env.py similarity index 68% rename from deepbots/supervisor/controllers/supervisor_env.py rename to deepbots/supervisor/controllers/deepbots_supervisor_env.py index e02259f..d2da665 100644 --- a/deepbots/supervisor/controllers/supervisor_env.py +++ b/deepbots/supervisor/controllers/deepbots_supervisor_env.py @@ -1,46 +1,28 @@ -from abc import ABC, abstractmethod - +import gym from controller import Supervisor -class SupervisorEnv(ABC): +class DeepbotsSupervisorEnv(Supervisor, gym.Env): """ - This class represents the basic template which contains the necessary - methods to train a reinforcement learning algorithm. The interface class - follows the gym interface which is standardized in many reinforcement - learning algorithms. The OpenAI gym environment can be described by the - following figure: - - +----------+ (action) +---------------+ - | |-------------------------------->| | - | Agent | | SupervisorEnv | - | |<--------------------------------| | - +----------+ (observation, reward) +---------------+ + This class is the highest class in deepbots class hierarchy, inheriting + both the Webots Supervisor controller and the basic gym.Env. + + Refer to gym.Env documentation on how to implement a custom gym.Env + for additional functionality. + + This class contains abstract methods that guide the development process + for users that want to implement a simple environment. This class is not intended for user usage, but to provide a common interface for all provided supervisor classes and make them compatible with reinforcement learning agents that work with the gym interface. Moreover, a problem-agnostic reset method is - provided. Please use any of the children supervisor classes to be - inherited by your own classes, such as the RobotSupervisor class. + provided. Please use any of the children supervisor classes to be + inherited by your own class, such as the RobotSupervisorEnv class. Nevertheless, advanced users can inherit this class to create their own supervisor classes if they wish. """ - def __init__(self): - self.supervisor = Supervisor() - - @abstractmethod - def get_observations(self): - """ - Return the observations of the robot. For example, metrics from - sensors, a camera image, etc. - - :returns: An object of observations - """ - pass - - @abstractmethod def step(self, action): """ On each timestep, the agent chooses an action for the previous @@ -48,34 +30,18 @@ def step(self, action): observation, *state_t+1*, the reward and whether the episode is done or not. + Each of the values returned is produced by implementations of + other abstract methods defined below. + observation: The next observation from the environment reward: The amount of reward awarded on this step is_done: Whether the episode is done - info: Diagnostic information mostly useful for debugging. + info: Diagnostic information mostly useful for debugging :param action: The agent's action :return: tuple, (observation, reward, is_done, info) """ - pass - - @abstractmethod - def get_reward(self, action): - """ - Calculates and returns the reward for this step. - - :param action: The agent's action - :return: The amount of reward awarded on this step - """ - pass - - @abstractmethod - def is_done(self): - """ - Used to inform the agent that the problem is solved. - - :return: bool, True if the episode is done - """ - pass + raise NotImplementedError def reset(self): """ @@ -93,8 +59,9 @@ def reset(self): :return: default observation provided by get_default_observation() """ - self.supervisor.simulationReset() - self.supervisor.simulationResetPhysics() + self.simulationReset() + self.simulationResetPhysics() + super(Supervisor, self).step(int(self.getBasicTimeStep())) return self.get_default_observation() def get_default_observation(self): @@ -105,12 +72,46 @@ def get_default_observation(self): :return: list-like, contains default agent observation """ - return NotImplementedError + raise NotImplementedError + + def get_observations(self): + """ + Return the observations of the robot. For example, metrics from + sensors, a camera image, etc. + + This method is use-case specific and needs to be implemented + by the user. + + :returns: An object of observations + """ + raise NotImplementedError + + def get_reward(self, action): + """ + Calculates and returns the reward for this step. + + This method is use-case specific and needs to be implemented + by the user. + + :param action: The agent's action + :return: The amount of reward awarded on this step + """ + raise NotImplementedError + + def is_done(self): + """ + Used to inform the agent that the problem is solved. + + This method is use-case specific and needs to be implemented + by the user. + + :return: bool, True if the episode is done + """ + raise NotImplementedError - @abstractmethod def get_info(self): """ This method can be implemented to return any diagnostic information on each step, e.g. for debugging purposes. """ - pass + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/emitter_receiver_supervisor_env.py b/deepbots/supervisor/controllers/emitter_receiver_supervisor_env.py new file mode 100644 index 0000000..c5e8a50 --- /dev/null +++ b/deepbots/supervisor/controllers/emitter_receiver_supervisor_env.py @@ -0,0 +1,117 @@ +from warnings import warn, simplefilter + +from deepbots.supervisor.controllers.deepbots_supervisor_env import DeepbotsSupervisorEnv +from controller import Supervisor + + +class EmitterReceiverSupervisorEnv(DeepbotsSupervisorEnv): + """ + This is the base class for the emitter - receiver scheme. + + Subclasses implement a variety of communication formats such as CSV + messages. + """ + def __init__(self, + emitter_name="emitter", + receiver_name="receiver", + timestep=None): + """ + The constructor sets up the timestep and calls the method that + initializes the emitter and receiver devices with the names provided. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :param timestep: The supervisor controller timestep + """ + super(EmitterReceiverSupervisorEnv, self).__init__() + + if timestep is None: + self.timestep = int(self.getBasicTimeStep()) + else: + self.timestep = timestep + + self.emitter, self.receiver = self.initialize_comms( + emitter_name, receiver_name) + + def initialize_comms(self, emitter_name, receiver_name): + """ + Initializes the emitter and receiver devices with the names provided. + + :param emitter_name: The name of the emitter device on the + supervisor node + :param receiver_name: The name of the receiver device on the + supervisor node + :return: The initialized emitter and receiver references + """ + emitter = self.getDevice(emitter_name) + receiver = self.getDevice(receiver_name) + receiver.enable(self.timestep) + return emitter, receiver + + def step(self, action): + """ + The basic step method that steps the controller, + calls the method that sends the action through the emitter + and returns the (observations, reward, done, info) object. + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Defined by the implementation of handle_emitter + :return: (observations, reward, done, info) as provided by the + corresponding methods as implemented for the use-case + """ + self.handle_emitter(action) + if super(Supervisor, self).step(self.timestep) == -1: + exit() + + return ( + self.get_observations(), + self.get_reward(action), + self.is_done(), + self.get_info(), + ) + + def handle_emitter(self, action): + """ + This method is implemented by subclasses depending on the + communication format used. + + :param action: The action that is sent through the emitter device + to the robot, e.g. an integer representing discrete actions + """ + raise NotImplementedError + + def handle_receiver(self): + """ + This method is implemented by subclasses depending on the + communication format used. + """ + raise NotImplementedError + + def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) + return self.timestep + + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) diff --git a/deepbots/supervisor/controllers/robot_supervisor.py b/deepbots/supervisor/controllers/robot_supervisor.py deleted file mode 100644 index 82bf104..0000000 --- a/deepbots/supervisor/controllers/robot_supervisor.py +++ /dev/null @@ -1,74 +0,0 @@ -from abc import abstractmethod - -from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv - - -class RobotSupervisor(SupervisorEnv): - """ - The RobotSupervisor class implements both a robot controller and a - supervisor RL environment. This class can be used when there is no - need to separate the Robot from the Supervisor, or the observations of - the robot are too big to be packaged in messages, e.g. high resolution - images from a camera, that introduce a bottleneck and reduce - performance significantly. - - Controllers that inherit this method *must* run on Robot nodes - that have supervisor privileges. - - The user needs to implement the regular methods for the environment, - reward(), get_observations(), get_default_observation, etc., from - SupervisorEnv according to their use-case in addition to the method - apply_action() introduced here. - - apply_action(): - (similar to use_message_data() of RobotEmitterReceiverCSV) - This method takes an action argument and translates it to a robot - action, e.g. motor speeds. - Note that apply_action() is called during step(). - """ - def __init__(self, time_step=None): - super(RobotSupervisor, self).__init__() - - if time_step is None: - self.timestep = int(self.supervisor.getBasicTimeStep()) - else: - self.timestep = time_step - - def get_timestep(self): - return self.timestep - - def step(self, action): - """ - Default step implementation that contains a Webots step conditional - for terminating properly. - - :param action: The agent's action - :return: tuple, (observation, reward, is_done, info) - """ - if self.supervisor.step(self.timestep) == -1: - exit() - - self.apply_action(action) - return ( - self.get_observations(), - self.get_reward(action), - self.is_done(), - self.get_info(), - ) - - @abstractmethod - def apply_action(self, action): - """ - This method should be implemented to apply whatever actions the - action argument contains on the robot, depending on the use-case. - This method is called by the step() method which provides the - action argument. - - For example, if the action argument is in the form of an integer - value, *0* could mean the action *move forward*. In this case, - motor speeds should be set here accordingly so the robot moves - forward. - - :param action: list, containing action data - """ - pass diff --git a/deepbots/supervisor/controllers/robot_supervisor_env.py b/deepbots/supervisor/controllers/robot_supervisor_env.py new file mode 100644 index 0000000..6aa9007 --- /dev/null +++ b/deepbots/supervisor/controllers/robot_supervisor_env.py @@ -0,0 +1,101 @@ +from warnings import warn, simplefilter +from deepbots.supervisor.controllers.deepbots_supervisor_env import DeepbotsSupervisorEnv +from controller import Supervisor + + +class RobotSupervisorEnv(DeepbotsSupervisorEnv): + """ + The RobotSupervisorEnv class implements both a robot controller and a + supervisor RL environment, referred to as Robot-Supervisor scheme. + + This class can be used when there is no need to separate the Robot + from the Supervisor, or the observations of the robot are too big + to be packaged in messages, e.g. high resolution images from a camera, + that introduce a bottleneck and reduce performance significantly. + + Controllers that inherit this method *must* run on Robot nodes + that have supervisor privileges. + + The user needs to implement the regular methods for the environment, + reward(), get_observations(), get_default_observation, etc., from + DeepbotsSupervisorEnv according to their use-case in addition to the method + apply_action() introduced here. + + apply_action(): + (similar to use_message_data() of CSVRobot) + This method takes an action argument and translates it to a robot + action, e.g. motor speeds. + Note that apply_action() is called during step(). + """ + def __init__(self, timestep=None): + super(RobotSupervisorEnv, self).__init__() + + if timestep is None: + self.timestep = int(self.getBasicTimeStep()) + else: + self.timestep = timestep + + def get_timestep(self): + # The filter is required so as to not ignore the Deprecation warning + simplefilter("once") + warn("get_timestep is deprecated, use .timestep instead", + DeprecationWarning) + return self.timestep + + @property + def timestep(self): + """ + Getter of _timestep field. Timestep is defined in milliseconds + + :return: The timestep of the controller in milliseconds + """ + return self._timestep + + @timestep.setter + def timestep(self, value): + """ + Setter of timestep field. Automatically converts to int as + required by Webots. + + :param value: The new controller timestep in milliseconds + """ + self._timestep = int(value) + + def step(self, action): + """ + The basic step method that steps the controller, + calls the method that applies the action on the robot + and returns the (observations, reward, done, info) object. + + :param action: Whatever the use-case uses as an action, e.g. + an integer representing discrete actions + :type action: Defined by the implementation of handle_emitter + :return: tuple, (observations, reward, done, info) as provided by the + corresponding methods as implemented for the use-case + """ + self.apply_action(action) + if super(Supervisor, self).step(self.timestep) == -1: + exit() + + return ( + self.get_observations(), + self.get_reward(action), + self.is_done(), + self.get_info(), + ) + + def apply_action(self, action): + """ + This method should be implemented to apply whatever actions the + action argument contains on the robot, depending on the use-case. + This method is called by the step() method which provides the + action argument. + + For example, if the action argument is in the form of an integer + value, *0* could mean the action *move forward*. In this case, + motor speeds should be set here accordingly so the robot moves + forward. + + :param action: list, containing action data + """ + raise NotImplementedError diff --git a/deepbots/supervisor/controllers/supervisor_emitter_receiver.py b/deepbots/supervisor/controllers/supervisor_emitter_receiver.py deleted file mode 100644 index 09dd60a..0000000 --- a/deepbots/supervisor/controllers/supervisor_emitter_receiver.py +++ /dev/null @@ -1,75 +0,0 @@ -from abc import abstractmethod -from collections.abc import Iterable - -from .supervisor_env import SupervisorEnv - - -class SupervisorEmitterReceiver(SupervisorEnv): - def __init__(self, - emitter_name="emitter", - receiver_name="receiver", - time_step=None): - - super(SupervisorEmitterReceiver, self).__init__() - - if time_step is None: - self.timestep = int(self.supervisor.getBasicTimeStep()) - else: - self.timestep = time_step - - self.emitter = None - self.receiver = None - self.initialize_comms(emitter_name, receiver_name) - - def initialize_comms(self, emitter_name, receiver_name): - self.emitter = self.supervisor.getDevice(emitter_name) - self.receiver = self.supervisor.getDevice(receiver_name) - self.receiver.enable(self.timestep) - return self.emitter, self.receiver - - def step(self, action): - if self.supervisor.step(self.timestep) == -1: - exit() - - self.handle_emitter(action) - return ( - self.get_observations(), - self.get_reward(action), - self.is_done(), - self.get_info(), - ) - - @abstractmethod - def handle_emitter(self, action): - pass - - @abstractmethod - def handle_receiver(self): - pass - - def get_timestep(self): - return self.timestep - - -class SupervisorCSV(SupervisorEmitterReceiver): - def __init__(self, - emitter_name="emitter", - receiver_name="receiver", - time_step=None): - super(SupervisorCSV, self).__init__(emitter_name, receiver_name, - time_step) - - def handle_emitter(self, action): - assert isinstance(action, Iterable), \ - "The action object should be Iterable" - - message = (",".join(map(str, action))).encode("utf-8") - self.emitter.send(message) - - def handle_receiver(self): - if self.receiver.getQueueLength() > 0: - string_message = self.receiver.getData().decode("utf-8") - self.receiver.nextPacket() - return string_message.split(",") - else: - return None diff --git a/deepbots/supervisor/wrappers/__init__.py b/deepbots/supervisor/wrappers/__init__.py index e69de29..ddc2aa9 100644 --- a/deepbots/supervisor/wrappers/__init__.py +++ b/deepbots/supervisor/wrappers/__init__.py @@ -0,0 +1,2 @@ +from deepbots.supervisor.wrappers.keyboard_printer import KeyboardPrinter +from deepbots.supervisor.wrappers.tensorboard_wrapper import TensorboardLogger diff --git a/deepbots/supervisor/wrappers/keyboard_printer.py b/deepbots/supervisor/wrappers/keyboard_printer.py index 01c1696..96e232d 100644 --- a/deepbots/supervisor/wrappers/keyboard_printer.py +++ b/deepbots/supervisor/wrappers/keyboard_printer.py @@ -1,16 +1,16 @@ from controller import Keyboard -from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv +from deepbots.supervisor.controllers.deepbots_supervisor_env import DeepbotsSupervisorEnv -class KeyboardPrinter(SupervisorEnv): +class KeyboardPrinter(DeepbotsSupervisorEnv): def __init__(self, controller): self.controller = controller self.keyboard = Keyboard() - self.keyboard.enable(self.controller.get_timestep()) + self.keyboard.enable(self.controller.timestep) def step(self, action): - observation, reward, isDone, info = self.controller.step(action) + observation, reward, is_done, info = self.controller.step(action) key = self.keyboard.getKey() # DEBUG CONTROLS if key == Keyboard.CONTROL + ord("A"): @@ -23,13 +23,13 @@ def step(self, action): print() print("Observations: ", self.controller.observation) - return observation, reward, isDone, info + return observation, reward, is_done, info def is_done(self): - isDone = self.controller.is_done() - if isDone: + is_done = self.controller.is_done() + if is_done: print("Done") - return isDone + return is_done def get_observations(self): return self.controller.get_observations() diff --git a/deepbots/supervisor/wrappers/tensorboard_wrapper.py b/deepbots/supervisor/wrappers/tensorboard_wrapper.py index 8508914..e1348d1 100644 --- a/deepbots/supervisor/wrappers/tensorboard_wrapper.py +++ b/deepbots/supervisor/wrappers/tensorboard_wrapper.py @@ -1,10 +1,10 @@ import numpy as np from tensorboardX import SummaryWriter -from deepbots.supervisor.controllers.supervisor_env import SupervisorEnv +from deepbots.supervisor.controllers.deepbots_supervisor_env import DeepbotsSupervisorEnv -class TensorboardLogger(SupervisorEnv): +class TensorboardLogger(DeepbotsSupervisorEnv): def __init__(self, controller, log_dir="logs/results", @@ -29,7 +29,7 @@ def __init__(self, self.file_writer = SummaryWriter(log_dir, flush_secs=30) def step(self, action): - observation, reward, isDone, info = self.controller.step(action) + observation, reward, is_done, info = self.controller.step(action) if (self.v_action > 1): self.file_writer.add_histogram( @@ -47,7 +47,7 @@ def step(self, action): self.file_writer.add_scalar("Rewards/Per Global Step", reward, self.step_global) - if (isDone): + if (is_done): self.file_writer.add_scalar( "Is Done/Per Reset step", self.step_cntr, @@ -60,13 +60,13 @@ def step(self, action): self.step_cntr += 1 self.step_global += 1 - return observation, reward, isDone, info + return observation, reward, is_done, info def is_done(self): - isDone = self.controller.is_done() + is_done = self.controller.is_done() self.file_writer.flush() - return isDone + return is_done def get_observations(self): obs = self.controller.get_observations() diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..1584f1b --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,57 @@ +FROM nvidia/cudagl:11.0-devel-ubuntu20.04 +ARG DEBIAN_FRONTEND=noninteractive + +ARG PYTHON_VERSION=3.8 + +ARG branch + +# Install ubuntu libaries +RUN apt-get update && \ + apt-get install -y --no-install-recommends build-essential cmake pkg-config \ + libfreetype6-dev git nano wget curl vim ca-certificates unzip libjpeg-dev \ + libpng-dev libosmesa6-dev software-properties-common xvfb gpg-agent + +# Install miniconda +RUN curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda update -n base -c defaults conda && \ + /opt/conda/bin/conda install -y python=$PYTHON_VERSION setuptools patchelf && \ + /opt/conda/bin/conda clean -ya +ENV PATH /opt/conda/bin:$PATH + + +# Env vars for the nvidia-container-runtime. +ENV PATH /usr/local/cuda/bin/:$PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/lib:/usr/local/cuda/lib64 +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +LABEL com.nvidia.volumes.needed="nvidia_driver" + +# Install weebots +RUN wget -qO- https://cyberbotics.com/Cyberbotics.asc | apt-key add - +RUN apt-add-repository 'deb https://cyberbotics.com/debian/ binary-amd64/' && \ + apt-get update && apt-get install -y webots + +# Save enviroment libraries +ENV WEBOTS_HOME /usr/local/webots +ENV LD_LIBRARY_PATH $WEBOTS_HOME/lib/controller:$LD_LIBRARY_PATH + + +ADD requirements.txt . +# Install python dependencies +RUN pip install -r requirements.txt + +RUN if [ $branch = "dev" ]; then pip install -i https://test.pypi.org/simple/ deepbots ; else pip install deepbots ; fi + +RUN pip install 'ray[tune]' 'ray[rllib]' + +# Fix the error of the custome enviroment on Ray +ADD preprocessors.py . +RUN cp -r preprocessors.py opt/conda/lib/python3.8/site-packages/ray/rllib/models/ +RUN rm preprocessors.py + + +WORKDIR /workspace +RUN chmod -R a+w /workspace \ No newline at end of file diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..cea8742 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,69 @@ +# Use the Docker of the deepbots + +## In case you would like to add other functionalities/libraries on the docker: + +* Edit the Dockerfile +* Build the docker image using below commands +* Building argument ```branch``` specify the ```dev``` or the ```master``` branch of deepbots. + +### Building and tagging a Docker image: +```bash +$ docker build -t yourusername/repository-name --build-arg branch=dev . +``` + +## Pull the existing image from DockerHub + +```bash +$ docker pull nickok/deepbots-dev +``` + +## For the use of Cuda on your docker container + +You should install NVIDIA Container Toolkit on your ```host``` machine. + +1) Setup the stable repository and the GPG key: +``` bash +$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ + && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \ + && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list +``` +2) Install the nvidia-docker2 package (and dependencies) after updating the package listing: + +``` bash +$ sudo apt-get update +``` + +``` bash +$ sudo apt-get install -y nvidia-docker2 +``` + +Restart the Docker daemon to complete the installation after setting the default runtime: +``` bash +$ sudo sudo systemctl restart docker +``` + + + +## Run docker + +### Use docker with ```cpu``` +Mount Webots project and run it on interactive Docker container: +```bash +$ docker run -it -v /absolute/path/to/webots/project:/workspace/name-of-project nickok/deepbots-dev +``` + +### Use docker with ```cuda``` (GPU) +``` bash +$ docker run --rm --gpus all run -it -v /absolute/path/to/webots/project:/workspace/name-of-project nickok/deepbots +``` + +After starting the docker container you can start Webots headlessly using xvfb: +```bash +$ xvfb-run webots --stdout --stderr --batch --no-sandbox --mode=fast /path/to/your/world/file + +``` + +Start Webots headlessly using xvfb and save the output at out.txt: +```bash +$ xvfb-run webots --stdout --stderr --batch --no-sandbox --mode=fast /path/to/your/world/file &> out.txt & +``` \ No newline at end of file diff --git a/docker/preprocessors.py b/docker/preprocessors.py new file mode 100644 index 0000000..2a9d1c0 --- /dev/null +++ b/docker/preprocessors.py @@ -0,0 +1,361 @@ +# This file is modified to be used for custom environments +# Link of the original file before my modification +# Link: https://github.com/ray-project/ray/blob/releases/1.2.0/rllib/models/preprocessors.py + +from collections import OrderedDict +import cv2 +import logging +import numpy as np +import gym +from typing import Any, List + +from ray.rllib.utils.annotations import override, PublicAPI +from ray.rllib.utils.spaces.repeated import Repeated +from ray.rllib.utils.typing import TensorType + +ATARI_OBS_SHAPE = (210, 160, 3) +ATARI_RAM_OBS_SHAPE = (128, ) + +# Only validate env observations vs the observation space every n times in a +# Preprocessor. +OBS_VALIDATION_INTERVAL = 100 + +logger = logging.getLogger(__name__) + + +@PublicAPI +class Preprocessor: + """Defines an abstract observation preprocessor function. + + Attributes: + shape (List[int]): Shape of the preprocessed output. + """ + + @PublicAPI + def __init__(self, obs_space: gym.Space, options: dict = None): + legacy_patch_shapes(obs_space) + self._obs_space = obs_space + if not options: + from ray.rllib.models.catalog import MODEL_DEFAULTS + self._options = MODEL_DEFAULTS.copy() + else: + self._options = options + self.shape = self._init_shape(obs_space, self._options) + self._size = int(np.product(self.shape)) + self._i = 0 + + @PublicAPI + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + """Returns the shape after preprocessing.""" + raise NotImplementedError + + @PublicAPI + def transform(self, observation: TensorType) -> np.ndarray: + """Returns the preprocessed observation.""" + raise NotImplementedError + + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + """Alternative to transform for more efficient flattening.""" + array[offset:offset + self._size] = self.transform(observation) + + def check_shape(self, observation: Any) -> None: + """Checks the shape of the given observation.""" + if self._i % OBS_VALIDATION_INTERVAL == 0: + if type(observation) is list and isinstance( + self._obs_space, gym.spaces.Box): + observation = np.array(observation) + + # try: + # if not self._obs_space.contains(observation): + # raise ValueError( + # "Observation ({}) outside given space ({})!", + # observation, self._obs_space) + # except AttributeError: + # raise ValueError( + # "Observation for a Box/MultiBinary/MultiDiscrete space " + # "should be an np.array, not a Python list.", observation) + self._i += 1 + + @property + @PublicAPI + def size(self) -> int: + return self._size + + @property + @PublicAPI + def observation_space(self) -> gym.Space: + obs_space = gym.spaces.Box(-1., 1., self.shape, dtype=np.float32) + # Stash the unwrapped space so that we can unwrap dict and tuple spaces + # automatically in modelv2.py + classes = (DictFlatteningPreprocessor, OneHotPreprocessor, + RepeatedValuesPreprocessor, TupleFlatteningPreprocessor) + if isinstance(self, classes): + obs_space.original_space = self._obs_space + return obs_space + + +class GenericPixelPreprocessor(Preprocessor): + """Generic image preprocessor. + + Note: for Atari games, use config {"preprocessor_pref": "deepmind"} + instead for deepmind-style Atari preprocessing. + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + self._grayscale = options.get("grayscale") + self._zero_mean = options.get("zero_mean") + self._dim = options.get("dim") + if self._grayscale: + shape = (self._dim, self._dim, 1) + else: + shape = (self._dim, self._dim, 3) + + return shape + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + """Downsamples images from (210, 160, 3) by the configured factor.""" + self.check_shape(observation) + scaled = observation[25:-25, :, :] + if self._dim < 84: + scaled = cv2.resize(scaled, (84, 84)) + # OpenAI: Resize by half, then down to 42x42 (essentially mipmapping). + # If we resize directly we lose pixels that, when mapped to 42x42, + # aren't close enough to the pixel boundary. + scaled = cv2.resize(scaled, (self._dim, self._dim)) + if self._grayscale: + scaled = scaled.mean(2) + scaled = scaled.astype(np.float32) + # Rescale needed for maintaining 1 channel + scaled = np.reshape(scaled, [self._dim, self._dim, 1]) + if self._zero_mean: + scaled = (scaled - 128) / 128 + else: + scaled *= 1.0 / 255.0 + return scaled + + +class AtariRamPreprocessor(Preprocessor): + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + return (128, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + return (observation - 128) / 128 + + +class OneHotPreprocessor(Preprocessor): + """One-hot preprocessor for Discrete and MultiDiscrete spaces. + + Examples: + >>> self.transform(Discrete(3).sample()) + ... np.array([0.0, 1.0, 0.0]) + >>> self.transform(MultiDiscrete([2, 3]).sample()) + ... np.array([0.0, 1.0, 0.0, 0.0, 1.0]) + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + if isinstance(obs_space, gym.spaces.Discrete): + return (self._obs_space.n, ) + else: + return (np.sum(self._obs_space.nvec), ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + arr = np.zeros(self._init_shape(self._obs_space, {}), dtype=np.float32) + if isinstance(self._obs_space, gym.spaces.Discrete): + arr[observation] = 1 + else: + for i, o in enumerate(observation): + arr[np.sum(self._obs_space.nvec[:i]) + o] = 1 + return arr + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + array[offset:offset + self.size] = self.transform(observation) + + +class NoPreprocessor(Preprocessor): + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + return self._obs_space.shape + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + return observation + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + array[offset:offset + self._size] = np.array( + observation, copy=False).ravel() + + @property + @override(Preprocessor) + def observation_space(self) -> gym.Space: + return self._obs_space + + +class TupleFlatteningPreprocessor(Preprocessor): + """Preprocesses each tuple element, then flattens it all into a vector. + + RLlib models will unpack the flattened output before _build_layers_v2(). + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, gym.spaces.Tuple) + size = 0 + self.preprocessors = [] + for i in range(len(self._obs_space.spaces)): + space = self._obs_space.spaces[i] + logger.debug("Creating sub-preprocessor for {}".format(space)) + preprocessor = get_preprocessor(space)(space, self._options) + self.preprocessors.append(preprocessor) + size += preprocessor.size + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + array = np.zeros(self.shape, dtype=np.float32) + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + assert len(observation) == len(self.preprocessors), observation + for o, p in zip(observation, self.preprocessors): + p.write(o, array, offset) + offset += p.size + + +class DictFlatteningPreprocessor(Preprocessor): + """Preprocesses each dict value, then flattens it all into a vector. + + RLlib models will unpack the flattened output before _build_layers_v2(). + """ + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, gym.spaces.Dict) + size = 0 + self.preprocessors = [] + for space in self._obs_space.spaces.values(): + logger.debug("Creating sub-preprocessor for {}".format(space)) + preprocessor = get_preprocessor(space)(space, self._options) + self.preprocessors.append(preprocessor) + size += preprocessor.size + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + self.check_shape(observation) + array = np.zeros(self.shape, dtype=np.float32) + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + if not isinstance(observation, OrderedDict): + observation = OrderedDict(sorted(observation.items())) + assert len(observation) == len(self.preprocessors), \ + (len(observation), len(self.preprocessors)) + for o, p in zip(observation.values(), self.preprocessors): + p.write(o, array, offset) + offset += p.size + + +class RepeatedValuesPreprocessor(Preprocessor): + """Pads and batches the variable-length list value.""" + + @override(Preprocessor) + def _init_shape(self, obs_space: gym.Space, options: dict) -> List[int]: + assert isinstance(self._obs_space, Repeated) + child_space = obs_space.child_space + self.child_preprocessor = get_preprocessor(child_space)(child_space, + self._options) + # The first slot encodes the list length. + size = 1 + self.child_preprocessor.size * obs_space.max_len + return (size, ) + + @override(Preprocessor) + def transform(self, observation: TensorType) -> np.ndarray: + array = np.zeros(self.shape) + if isinstance(observation, list): + for elem in observation: + self.child_preprocessor.check_shape(elem) + else: + pass # ValueError will be raised in write() below. + self.write(observation, array, 0) + return array + + @override(Preprocessor) + def write(self, observation: TensorType, array: np.ndarray, + offset: int) -> None: + if not isinstance(observation, list): + raise ValueError("Input for {} must be list type, got {}".format( + self, observation)) + elif len(observation) > self._obs_space.max_len: + raise ValueError("Input {} exceeds max len of space {}".format( + observation, self._obs_space.max_len)) + # The first slot encodes the list length. + array[offset] = len(observation) + for i, elem in enumerate(observation): + offset_i = offset + 1 + i * self.child_preprocessor.size + self.child_preprocessor.write(elem, array, offset_i) + + +@PublicAPI +def get_preprocessor(space: gym.Space) -> type: + """Returns an appropriate preprocessor class for the given space.""" + + legacy_patch_shapes(space) + obs_shape = space.shape + + if isinstance(space, (gym.spaces.Discrete, gym.spaces.MultiDiscrete)): + preprocessor = OneHotPreprocessor + elif obs_shape == ATARI_OBS_SHAPE: + preprocessor = GenericPixelPreprocessor + elif obs_shape == ATARI_RAM_OBS_SHAPE: + preprocessor = AtariRamPreprocessor + elif isinstance(space, gym.spaces.Tuple): + preprocessor = TupleFlatteningPreprocessor + elif isinstance(space, gym.spaces.Dict): + preprocessor = DictFlatteningPreprocessor + elif isinstance(space, Repeated): + preprocessor = RepeatedValuesPreprocessor + else: + preprocessor = NoPreprocessor + + return preprocessor + + +def legacy_patch_shapes(space: gym.Space) -> List[int]: + """Assigns shapes to spaces that don't have shapes. + + This is only needed for older gym versions that don't set shapes properly + for Tuple and Discrete spaces. + """ + + if not hasattr(space, "shape"): + if isinstance(space, gym.spaces.Discrete): + space.shape = () + elif isinstance(space, gym.spaces.Tuple): + shapes = [] + for s in space.spaces: + shape = legacy_patch_shapes(s) + shapes.append(shape) + space.shape = tuple(shapes) + + return space.shape \ No newline at end of file diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..a728d0a --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,7 @@ +gym==0.17.2 +numpy==1.22.0 +tensorflow==2.9.3 +stable-baselines3 +ray==1.2.0 +torch==1.13.1 +wandb \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d297686..dc587eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -seed-isort-config -pre-commit +gym==0.21 +tensorboardX diff --git a/setup.py b/setup.py index 81a1ccd..8adcf46 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,10 @@ DESCRIPTION = "A wrapper framework for Reinforcement Learning in Webots \ simulator" + +with open("requirements.txt") as requirements_file: + requirements = requirements_file.read().splitlines() + with open("README.md", "r", encoding="utf-8") as readme_file: readme = readme_file.read() @@ -10,9 +14,10 @@ name="deepbots", author="aidudezzz", author_email="deepbots@protonmail.com", - version="0.1.3-dev1", + version="0.1.3-dev4", description=DESCRIPTION, long_description=readme, long_description_content_type="text/markdown", packages=find_packages(), + install_requires=requirements, )