mutliagent.py allow 3D flight? #172

zhaohubo · 2023-09-22T21:16:49Z

#126 As mentioned in this, does it support 3D flight？

JacopoPan · 2023-10-09T14:12:07Z

yes, if the ActionType is one of the following 3

gym-pybullet-drones/gym_pybullet_drones/utils/enums.py

Lines 35 to 39 in 3634125

    
           class ActionType(Enum): 
        
               """Action type enumeration class.""" 
        
               RPM = "rpm"                 # RPMS 
        
               PID = "pid"                 # PID control 
        
               VEL = "vel"                 # Velocity input (using PID control)

zhaohubo · 2023-10-10T21:08:04Z

Hi @JacopoPan
I have successfully conducted experiments with two drones. Now, I want to expand my experiments by including more drones, for example, three in total. Here is the modified code I have implemented. When the ActionType is set to "one_d_rpm", the leader-follower functionality works successfully. However, when I try to implement 3D flight by changing the ActionType to "rpm/pid/vel", the training results are not satisfactory. I'm wondering if there is an error in my model modifications or if the algorithm parameters need to be adjusted. It would be greatly appreciated if you could assist me with this as it is crucial for my project.
`
import sys
import os
import time
import argparse
from datetime import datetime
import subprocess
import pdb
import math
import numpy as np
import pybullet as p
import pickle
import matplotlib.pyplot as plt
import gym
from gym import error, spaces, utils
from gym.utils import seeding
from gym.spaces import Box, Dict
import torch
import torch.nn as nn
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork
import ray
from ray import tune
from ray.tune.logger import DEFAULT_LOGGERS
from ray.tune import register_env
from ray.rllib.agents import ppo
from ray.rllib.agents.ppo import PPOTrainer, PPOTFPolicy
from ray.rllib.examples.policy.random_policy import RandomPolicy
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.env.multi_agent_env import ENV_STATE

sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from gym_pybullet_drones.envs.BaseAviary import DroneModel, Physics
from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
from gym_pybullet_drones.envs.single_agent_rl.BaseSingleAgentAviary import ActionType, ObservationType
from gym_pybullet_drones.utils.Logger import Logger

import shared_constants

OWN_OBS_VEC_SIZE = None  # Modified at runtime
ACTION_VEC_SIZE = None  # Modified at runtime


class CustomTorchCentralizedCriticModel(TorchModelV2, nn.Module):

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        self.action_model = FullyConnectedNetwork(
            Box(low=-1, high=1, shape=(OWN_OBS_VEC_SIZE,)),
            action_space,
            num_outputs,
            model_config,
            name + "_action"
        )
        self.value_model = FullyConnectedNetwork(
            obs_space,
            action_space,
            1,
            model_config,
            name + "_vf"
        )
        self._model_in = None

    def forward(self, input_dict, state, seq_lens):
        self._model_in = [input_dict["obs_flat"], state, seq_lens]
        return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)

    def value_function(self):
        value_out, _ = self.value_model({"obs": self._model_in[0]}, self._model_in[1], self._model_in[2])
        return torch.reshape(value_out, [-1])


class FillInActions(DefaultCallbacks):
    def on_postprocess_trajectory(self, worker, episode, agent_id, policy_id, policies, postprocessed_batch,
                                  original_batches, **kwargs):
        to_update = postprocessed_batch[SampleBatch.CUR_OBS]

        action_encoder = ModelCatalog.get_preprocessor_for_space(
            # Box(-np.inf, np.inf, (ACTION_VEC_SIZE,), np.float32) # Unbounded
            Box(-1, 1, (ACTION_VEC_SIZE,), np.float32)  # Bounded
        )

    opponent_actionss = []
    num_agents = len(original_batches)
    for other_id in range(num_agents):
        if other_id != agent_id:
            _, opponent_batch = original_batches[other_id]
            # opponent_actions = np.array([action_encoder.transform(a) for a in opponent_batch[SampleBatch.ACTIONS]]) # Unbounded
            opponent_actions = np.array(
                [action_encoder.transform(np.clip(a, -1, 1)) for a in
                 opponent_batch[SampleBatch.ACTIONS]])  # Bounded
            opponent_actionss.append(opponent_actions)

    opponent_actionss = np.array(opponent_actionss)
    num_agent_actions = (num_agents - 1) * ACTION_VEC_SIZE
    opponent_actionss = opponent_actionss.reshape(-1, num_agent_actions)

    to_update[:, -num_agent_actions:] = opponent_actionss


def central_critic_observer(agent_obs, **kw):
    new_obs = {
        0: {
            "own_obs": agent_obs[0],
            "opponent_obs": {
                "key1": agent_obs[1],
                "key2": agent_obs[2]
            },
            "opponent_action": np.zeros(ACTION_VEC_SIZE),  # Filled in by FillInActions
        },
        1: {
            "own_obs": agent_obs[1],
            "opponent_obs": {
                "key1": agent_obs[0],
                "key2": agent_obs[2]
            },
            "opponent_action": np.zeros(ACTION_VEC_SIZE),  # Filled in by FillInActions
        },
        2: {
            "own_obs": agent_obs[2],
            "opponent_obs": {
                "key1": agent_obs[0],
                "key2": agent_obs[1]
            },
            "opponent_action": np.zeros(ACTION_VEC_SIZE),  # Filled in by FillInActions
        },

    }
    return new_obs

if name == "main":

#### Define and parse (optional) arguments for the script ##
parser = argparse.ArgumentParser(description='Multi-agent reinforcement learning experiments script')
parser.add_argument('--num_drones', default=3, type=int, help='Number of drones (default: 3)', metavar='')
parser.add_argument('--env', default='leaderfollower', type=str, choices=['leaderfollower', 'flock', 'meetup'],
                    help='Help (default: ..)', metavar='')
parser.add_argument('--obs', default='kin', type=ObservationType, help='Help (default: ..)', metavar='')
parser.add_argument('--act', default='pid', type=ActionType, help='Help (default: ..)', metavar='')
parser.add_argument('--algo', default='cc', type=str, choices=['cc'], help='Help (default: ..)', metavar='')
parser.add_argument('--workers', default=0, type=int, help='Help (default: ..)', metavar='')
ARGS = parser.parse_args()

#### Save directory ########################################
filename = os.path.dirname(os.path.abspath(__file__)) + '/results/save-' + ARGS.env + '-' + str(
    ARGS.num_drones) + '-' + ARGS.algo + '-' + ARGS.obs.value + '-' + ARGS.act.value + '-' + datetime.now().strftime(
    "%m.%d.%Y_%H.%M.%S")
if not os.path.exists(filename):
    os.makedirs(filename + '/')

#### Print out current git commit hash #####################
git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
with open(filename + '/git_commit.txt', 'w+') as f:
    f.write(str(git_commit))

#### Constants, and errors #################################
if ARGS.obs == ObservationType.KIN:
    OWN_OBS_VEC_SIZE = 12
elif ARGS.obs == ObservationType.RGB:
    print("[ERROR] ObservationType.RGB for multi-agent systems not yet implemented")
    exit()
else:
    print("[ERROR] unknown ObservationType")
    exit()
if ARGS.act in [ActionType.ONE_D_RPM, ActionType.ONE_D_DYN, ActionType.ONE_D_PID]:
    ACTION_VEC_SIZE = 1
elif ARGS.act in [ActionType.RPM, ActionType.DYN, ActionType.VEL]:
    ACTION_VEC_SIZE = 4
elif ARGS.act == ActionType.PID:
    ACTION_VEC_SIZE = 3
else:
    print("[ERROR] unknown ActionType")
    exit()

#### Uncomment to debug slurm scripts ######################
# exit()

#### Initialize Ray Tune ###################################
ray.shutdown()
ray.init(ignore_reinit_error=True)

#### Register the custom centralized critic model ##########
ModelCatalog.register_custom_model("cc_model", CustomTorchCentralizedCriticModel)

#### Register the environment ##############################
temp_env_name = "this-aviary-v0"
if ARGS.env == 'flock':
    register_env(temp_env_name, lambda _: FlockAviary(num_drones=ARGS.num_drones,
                                                      aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                                                      obs=ARGS.obs,
                                                      act=ARGS.act
                                                      )
                 )
elif ARGS.env == 'leaderfollower':
    register_env(temp_env_name, lambda _: LeaderFollowerAviary(num_drones=ARGS.num_drones,
                                                               aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                                                               obs=ARGS.obs,
                                                               act=ARGS.act
                                                               )
                 )
elif ARGS.env == 'meetup':
    register_env(temp_env_name, lambda _: MeetupAviary(num_drones=ARGS.num_drones,
                                                       aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                                                       obs=ARGS.obs,
                                                       act=ARGS.act
                                                       )
                 )
else:
    print("[ERROR] environment not yet implemented")
    exit()

#### Unused env to extract the act and obs spaces ##########
if ARGS.env == 'flock':
    temp_env = FlockAviary(num_drones=ARGS.num_drones,
                           aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                           obs=ARGS.obs,
                           act=ARGS.act
                           )
elif ARGS.env == 'leaderfollower':
    temp_env = LeaderFollowerAviary(num_drones=ARGS.num_drones,
                                    aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                                    obs=ARGS.obs,
                                    act=ARGS.act
                                    )
elif ARGS.env == 'meetup':
    temp_env = MeetupAviary(num_drones=ARGS.num_drones,
                            aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
                            obs=ARGS.obs,
                            act=ARGS.act
                            )
else:
    print("[ERROR] environment not yet implemented")
    exit()

observer_space = Dict({
    "own_obs": temp_env.observation_space[0],
    "opponent_obs": Dict(
        {
            "key1": temp_env.observation_space[0],
            "key2": temp_env.observation_space[0]
            # other_id: temp_env.observation_space[0]
            # for other_id in range(num_agents)
            # if other_id != agent_id
        }
    ),
    "opponent_action": temp_env.action_space[0],
})
action_space = temp_env.action_space[0]

#### Note ##################################################
# RLlib will create ``num_workers + 1`` copies of the
# environment since one copy is needed for the driver process.
# To avoid paying the extra overhead of the driver copy,
# which is needed to access the env's action and observation spaces,
# you can defer environment initialization until ``reset()`` is called

#### Set up the trainer's config ###########################
config = ppo.DEFAULT_CONFIG.copy()  # For the default config, see github.com/ray-project/ray/blob/master/rllib/agents/trainer.py
config = {
    "env": temp_env_name,
    "num_workers": 0 + ARGS.workers,
    "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),  # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0
    "batch_mode": "complete_episodes",
    "callbacks": FillInActions,
    "framework": "torch",
}

#### Set up the model parameters of the trainer's config ###
config["model"] = {
    "custom_model": "cc_model",
}

#### Set up the multiagent params of the trainer's config ##
config["multiagent"] = {
    "policies": {
        "pol0": (None, observer_space, action_space, {"agent_id": 0, }),
        "pol1": (None, observer_space, action_space, {"agent_id": 1, }),
        "pol2": (None, observer_space, action_space, {"agent_id": 2, }),
    },
    "policy_mapping_fn": lambda x: "pol0" if x == 0 else ("pol1" if x == 1 else "pol2"),
    # "pol0" if x == 0 else "pol1",  # # Function mapping agent ids to policy ids
    "observation_fn": central_critic_observer,  # See rllib/evaluation/observation_function.py for more info
}

#### Ray Tune stopping conditions ##########################
stop = {
    "timesteps_total": 500000,  # 100000 ~= 10'
    # "episode_reward_mean": -50,
    # "training_iteration": 0,
}

#### Train #################################################
results = tune.run(
    "PPO",
    stop=stop,
    config=config,
    verbose=True,
    checkpoint_at_end=True,
    local_dir=filename,
)
# check_learning_achieved(results, 1.0)

#### Save agent ############################################
checkpoints = results.get_trial_checkpoints_paths(trial=results.get_best_trial('episode_reward_mean',
                                                                               mode='max'
                                                                               ),
                                                  metric='episode_reward_mean'
                                                  )
with open(filename + '/checkpoint.txt', 'w+') as f:
    f.write(checkpoints[0][0])

#### Shut down Ray #########################################
ray.shutdown()

`

JacopoPan · 2023-10-12T05:37:59Z

Hi @zhaohubo

I haven't tried a 3 drone example but, as you are doing, of course, you should start by extending the centralized critic.
If it works in the 1D case, I'd think that that part is correct, however, the 3D case with rpm inputs might be too difficult, what are the results with pid? Have you modified the reward function as well?

JacopoPan added the question Further information is requested label Oct 9, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

mutliagent.py allow 3D flight? #172

mutliagent.py allow 3D flight? #172

zhaohubo commented Sep 22, 2023

JacopoPan commented Oct 9, 2023

zhaohubo commented Oct 10, 2023

JacopoPan commented Oct 12, 2023

mutliagent.py allow 3D flight? #172

mutliagent.py allow 3D flight? #172

Comments

zhaohubo commented Sep 22, 2023

JacopoPan commented Oct 9, 2023

zhaohubo commented Oct 10, 2023

JacopoPan commented Oct 12, 2023