Skip to content
This repository has been archived by the owner on Dec 11, 2022. It is now read-only.

Simple Behavioral Cloning example #445

Open
meteoiker opened this issue May 8, 2020 · 0 comments
Open

Simple Behavioral Cloning example #445

meteoiker opened this issue May 8, 2020 · 0 comments

Comments

@meteoiker
Copy link

meteoiker commented May 8, 2020

I am trying to make a simple BC experiment using RL-Coach and a custom Gym environment. Based on the idea of this I am trying to predict the target variable based on f_1, f_2 and f_3. The Gym environment I am using is:

import random
import numpy as np
import pandas as pd

import gym
from gym import spaces

from sklearn.preprocessing import MinMaxScaler


class TestEnvOne(gym.Env):

    def __init__(self, max_time):
        super(TestEnvOne, self).__init__()
        self.max_time = max_time
        f_1 = np.sin(np.arange(self.max_time))
        f_2 = np.cos(np.arange(self.max_time))
        f_3 = np.tan(np.arange(self.max_time))
        target = np.roll(f_1, 1) + np.roll(f_2, 2) + np.roll(f_3, 3)
        df = pd.DataFrame({'target': target, 'f_1': f_1, 'f_2': f_2, 'f_3': f_3})
        list_col = ['target', 'f_1', 'f_2', 'f_3']
        df[list_col] = MinMaxScaler().fit_transform(df[list_col])
        self.df = df
        self.start_step = 0
        self.current_step = 0

        # Actions
        self.action_space = spaces.Box(low=np.array([0]), high=np.array([1]), dtype=np.float32)

        self.observation_space = gym.spaces.dict.Dict(
            {'measurements': spaces.Box(low=0.0, high=1.1, shape=(3,), dtype=np.float32),
             'desired_goal': spaces.Box(low=np.array([0]), high=np.array([1]), dtype=np.float32)
             })
        self.reward_range = (-1, 1)

    def _next_observation(self):
        measurements = np.array([
            self.df.loc[self.current_step, 'f_1'],
            self.df.loc[self.current_step, 'f_2'],
            self.df.loc[self.current_step, 'f_3']
        ])

        frame = {'desired_goal': self.df.loc[self.current_step, 'target'].reshape(-1, 1),
                 'measurements': measurements
                 }
        return frame

    def step(self, action):
        self.current_step += 1
        if self.current_step >= len(self.df.loc[:, 'target'].values):
            self.current_step = 0

        obs = self._next_observation()
        reward = (obs['desired_goal'] - action)[0][0]
        done = (self.current_step == self.start_step)
        returning_value = {'measurements': obs['measurements'], 'desired_goal': obs['desired_goal']}
        all = returning_value, reward, done, {}
        return all

    def reset(self):
        # Set the current step to a random point within the data frame
        self.start_step = random.randint(0, len(self.df.loc[:, 'target'].values) - 1)
        self.current_step = self.start_step
        observation = self._next_observation()
        return observation

    def render(self, mode='human', close=False):
        # Render the environment to the screen
        print(f'Step: {self.current_step}')
        print(f'Target: {self.df.loc[self.current_step, "target"]}')

    def seed(self, seed=None):
        self.seed_value = seed
        return [seed]

The preset I am using is based on Doom Basic BC as the following:

from rl_coach.agents.bc_agent import BCAgentParameters
from rl_coach.architectures.embedder_parameters import InputEmbedderParameters
from rl_coach.base_parameters import VisualizationParameters, PresetValidationParameters
from rl_coach.core_types import TrainingSteps, EnvironmentEpisodes, EnvironmentSteps
from rl_coach.environments.gym_environment import GymVectorEnvironment
from rl_coach.graph_managers.basic_rl_graph_manager import BasicRLGraphManager
from rl_coach.graph_managers.graph_manager import ScheduleParameters
from rl_coach.memories.memory import MemoryGranularity


####################
# Graph Scheduling #
####################

schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(2000)
schedule_params.steps_between_evaluation_periods = TrainingSteps(20)
schedule_params.evaluation_steps = EnvironmentEpisodes(5)
schedule_params.heatup_steps = EnvironmentSteps(10)

#########
# Agent #
#########
agent_params = BCAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.00025
agent_params.memory.max_size = (MemoryGranularity.Transitions, 1000000)
agent_params.algorithm.discount = 0.99
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0)
#agent_params.network_wrappers['main'].batch_size = 1
agent_params.network_wrappers['main'].input_embedders_parameters = {'measurements': InputEmbedderParameters(),'desired_goal': InputEmbedderParameters()}


###############
# Environment #
###############
#envPath = 'env.TestEnvZero:TestEnvZero'
envPath = 'env.TestEnvOne:TestEnvOne'
env_params = GymVectorEnvironment(level=envPath)
env_params.additional_simulator_parameters = {'max_time': 2000}

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test_using_a_trace_test = False

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
                                    preset_validation_params=preset_validation_params)

Running using command line: coach -p presets/PruebaPresetBC.py

I am getting a exception in the improve phase:

Traceback (most recent call last):
  File "/home/meteo/coach_env/bin/coach", line 8, in <module>
    sys.exit(main())
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 777, in main
    launcher.launch()
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 226, in launch
    self.run_graph_manager(graph_manager, args)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 612, in run_graph_manager
    self.start_single_threaded(task_parameters, graph_manager, args)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 674, in start_single_threaded
    start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/coach.py", line 88, in start_graph
    graph_manager.improve()
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 547, in improve
    self.train_and_act(self.steps_between_evaluation_periods)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 481, in train_and_act
    self.act(EnvironmentSteps(1))
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/graph_managers/graph_manager.py", line 447, in act
    result = self.top_level_manager.step(None)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/level_manager.py", line 245, in step
    action_info = acting_agent.act()
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/agents/agent.py", line 851, in act
    action = self.choose_action(curr_state)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/agents/imitation_agent.py", line 43, in choose_action
    prediction = self.networks['main'].online_network.predict(self.prepare_batch_for_inference(curr_state, 'main'))
  File "/home/meteo/coach_env/lib/python3.6/site-packages/rl_coach/architectures/tensorflow_components/architecture.py", line 547, in predict
    output = self.sess.run(outputs, feed_dict)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
    run_metadata_ptr)
  File "/home/meteo/coach_env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1149, in _run
    str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (1, 3) for Tensor 'main_level/agent/main/online/network_0/measurements/measurements:0', which has shape '(?, 0)'

I think it is trying to use a measurement (which are three values) into a zero sized network. I don't know where this zero shape comes from. Any idea? Thanks

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant