Merge pull request #74 from CarperAI/2.0

2.0
NeuralMMO · May 18, 2023 · 416b7d8 · 416b7d8
2 parents 5aaf33b + 60fece1
commit 416b7d8
Show file tree

Hide file tree

Showing 38 changed files with 2,459 additions and 818 deletions.
diff --git a/nmmo/__init__.py b/nmmo/__init__.py
@@ -9,7 +9,6 @@
 from .core import config, agent
 from .core.agent import Agent
 from .core.env import Env
-from .systems.achievement import Task
 from .core.terrain import MapGenerator, Terrain
 
 MOTD = rf'''      ___           ___           ___           ___
@@ -26,7 +25,7 @@
 
 __all__ = ['Env', 'config', 'agent', 'Agent', 'MapGenerator', 'Terrain',
         'action', 'Action', 'material', 'spawn',
-        'Task', 'Overlay', 'OverlayRegistry']
+        'Overlay', 'OverlayRegistry']
 
 try:
   __all__.append('OpenSkillRating')

diff --git a/nmmo/core/config.py b/nmmo/core/config.py
@@ -154,9 +154,6 @@ def game_system_enabled(self, name) -> bool:
   PLAYERS                      = [Agent]
   '''Player classes from which to spawn'''
 
-  TASKS                        = []
-  '''Tasks for which to compute rewards'''
-
   ############################################################################
   ### Emulation Parameters
 

diff --git a/nmmo/core/env.py b/nmmo/core/env.py
@@ -1,6 +1,7 @@
 import functools
 import random
-from typing import Any, Dict, List
+import copy
+from typing import Any, Dict, List, Optional, Union, Tuple
 from ordered_set import OrderedSet
 
 import gym
@@ -14,13 +15,15 @@
 from nmmo.entity.entity import Entity
 from nmmo.systems.item import Item
 from nmmo.core import realm
-
+from nmmo.task.game_state import GameStateGenerator
+from nmmo.task.task_api import Task
+from nmmo.task.scenario import default_task
 from scripted.baselines import Scripted
 
-
 class Env(ParallelEnv):
   # Environment wrapper for Neural MMO using the Parallel PettingZoo API
 
+  #pylint: disable=no-value-for-parameter
   def __init__(self,
     config: Default = nmmo.config.Default(), seed=None):
     self._init_random(seed)
@@ -35,6 +38,18 @@ def __init__(self,
     self._dead_agents = OrderedSet()
     self.scripted_agents = OrderedSet()
 
+    self._gamestate_generator = GameStateGenerator(self.realm, self.config)
+    self.game_state = None
+    # Default task: rewards 1 each turn agent is alive
+    self.tasks: List[Tuple[Task,float]] = None
+    self._task_encoding = None
+    self._task_embedding_size = -1
+    t = default_task(self.possible_agents)
+    self.change_task(t,
+                     embedding_size=self._task_embedding_size,
+                     task_encoding=self._task_encoding,
+                     reset=False)
+
   # pylint: disable=method-cache-max-size-none
   @functools.lru_cache(maxsize=None)
   def observation_space(self, agent: int):
@@ -60,7 +75,7 @@ def box(rows, cols):
       "Tick": gym.spaces.Discrete(1),
       "AgentId": gym.spaces.Discrete(1),
       "Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes),
-      "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes)
+      "Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes),
     }
 
     if self.config.ITEM_SYSTEM_ENABLED:
@@ -72,6 +87,12 @@ def box(rows, cols):
     if self.config.PROVIDE_ACTION_TARGETS:
       obs_space['ActionTargets'] = self.action_space(None)
 
+    if self._task_encoding:
+      obs_space['Task'] = gym.spaces.Box(
+          low=-2**20, high=2**20,
+          shape=(self._task_embedding_size,),
+          dtype=np.float32)
+
     return gym.spaces.Dict(obs_space)
 
   def _init_random(self, seed):
@@ -109,6 +130,28 @@ def action_space(self, agent):
   ############################################################################
   # Core API
 
+  def change_task(self,
+                  new_tasks: List[Union[Tuple[Task, float], Task]],
+                  task_encoding: Optional[Dict[int, np.ndarray]] = None,
+                  embedding_size: int=16,
+                  reset: bool=True,
+                  map_id=None,
+                  seed=None,
+                  options=None):
+    """ Changes the task given to each agent
+
+    Args:
+      new_task: The task to complete and calculate rewards
+      task_encoding: A mapping from eid to encoded task
+      embedding_size: The size of each embedding
+      reset: Resets the environment
+    """
+    self._tasks = [t if isinstance(t, Tuple) else (t,1) for t in new_tasks]
+    self._task_encoding = task_encoding
+    self._task_embedding_size = embedding_size
+    if reset:
+      self.reset(map_id=map_id, seed=seed, options=options)
+
   # TODO: This doesn't conform to the PettingZoo API
   # pylint: disable=arguments-renamed
   def reset(self, map_id=None, seed=None, options=None):
@@ -142,9 +185,16 @@ def reset(self, map_id=None, seed=None, options=None):
       if isinstance(ent.agent, Scripted):
         self.scripted_agents.add(eid)
 
+    self.tasks = copy.deepcopy(self._tasks)
     self.obs = self._compute_observations()
+    self._gamestate_generator = GameStateGenerator(self.realm, self.config)
 
-    return {a: o.to_gym() for a,o in self.obs.items()}
+    gym_obs = {}
+    for a, o in self.obs.items():
+      gym_obs[a] = o.to_gym()
+      if self._task_encoding:
+        gym_obs[a]['Task'] = self._encode_goal().get(a,np.zeros(self._task_embedding_size))
+    return gym_obs
 
   def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
     '''Simulates one game tick or timestep
@@ -239,18 +289,15 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
           Provided for conformity with PettingZoo
     '''
     assert self.obs is not None, 'step() called before reset'
-
     # Add in scripted agents' actions, if any
     if self.scripted_agents:
       actions = self._compute_scripted_agent_actions(actions)
 
     # Drop invalid actions of BOTH neural and scripted agents
     #   we don't need _deserialize_scripted_actions() anymore
     actions = self._validate_actions(actions)
-
     # Execute actions
     self.realm.step(actions)
-
     dones = {}
     for eid in self.possible_agents:
       if eid not in self._dead_agents and (
@@ -262,7 +309,11 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
 
     # Store the observations, since actions reference them
     self.obs = self._compute_observations()
-    gym_obs = {a: o.to_gym() for a,o in self.obs.items()}
+    gym_obs = {}
+    for a, o in self.obs.items():
+      gym_obs[a] = o.to_gym()
+      if self._task_encoding:
+        gym_obs[a]['Task'] = self._encode_goal()[a]
 
     rewards, infos = self._compute_rewards(self.obs.keys(), dones)
 
@@ -332,6 +383,7 @@ def _compute_observations(self):
         obs: Dictionary of observations for each agent
         obs[agent_id] = {
           "Entity": [e1, e2, ...],
+          "Task": [encoded_task],
           "Tile": [t1, t2, ...],
           "Inventory": [i1, i2, ...],
           "Market": [m1, m2, ...],
@@ -364,12 +416,17 @@ def _compute_observations(self):
 
       inventory = Item.Query.owned_by(self.realm.datastore, agent_id)
 
-      obs[agent_id] = Observation(
-        self.config, self.realm.tick,
-        agent_id, visible_tiles, visible_entities, inventory, market)
-
+      obs[agent_id] = Observation(self.config,
+                                  self.realm.tick,
+                                  agent_id,
+                                  visible_tiles,
+                                  visible_entities,
+                                  inventory, market)
     return obs
 
+  def _encode_goal(self):
+    return self._task_encoding
+
   def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]):
     '''Computes the reward for the specified agent
 
@@ -385,21 +442,31 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]):
           The reward for the actions on the previous timestep of the
           entity identified by ent_id.
     '''
+    # Initialization
+    self.game_state = self._gamestate_generator.generate(self.realm, self.obs)
     infos = {}
-    rewards = { eid: -1 for eid in dones }
-
-    for agent_id in agents:
-      infos[agent_id] = {}
-      agent = self.realm.players.get(agent_id)
-      assert agent is not None, f'Agent {agent_id} not found'
-
-      if agent.diary is not None:
-        rewards[agent_id] = sum(agent.diary.rewards.values())
-        infos[agent_id].update(agent.diary.rewards)
+    for eid in agents:
+      infos[eid] = {}
+      infos[eid]['task'] = {}
+    rewards = {eid: 0 for eid in agents}
+
+    # Compute Rewards and infos
+    for task, weight in self.tasks:
+      task_rewards, task_infos = task.compute_rewards(self.game_state)
+      for eid, reward in task_rewards.items():
+        # Rewards, weighted
+        rewards[eid] = rewards.get(eid,0) + reward * weight
+        # Infos
+        for eid, info in task_infos.items():
+          if eid in infos:
+            infos[eid]['task'] = {**infos[eid]['task'], **info}
+
+    # Remove rewards for dead agents (?)
+    for eid in dones:
+      rewards[eid] = 0
 
     return rewards, infos
 
-
   ############################################################################
   # PettingZoo API
   ############################################################################

diff --git a/nmmo/core/log_helper.py b/nmmo/core/log_helper.py
@@ -80,13 +80,6 @@ def _register_player_stat(self, name: str, func: callable):
 
   def _register_player_stats(self):
     self._register_player_stat('Basic/TimeAlive', lambda player: player.history.time_alive.val)
-
-    if self.config.TASKS:
-      self._register_player_stat('Task/Completed', lambda player: player.diary.completed)
-      self._register_player_stat('Task/Reward' , lambda player: player.diary.cumulative_reward)
-    else:
-      self._register_player_stat('Task/Completed', lambda player: player.history.time_alive.val)
-
     # Skills
     if self.config.PROGRESSION_SYSTEM_ENABLED:
       if self.config.COMBAT_SYSTEM_ENABLED:
@@ -137,13 +130,6 @@ def _player_stats(self, player: Agent) -> Dict[str, float]:
     for key, stat_func in self._player_stats_funcs.items():
       stats[f'{key}_{policy}'] = stat_func(player)
 
-    stats['Task_Reward'] = player.history.time_alive.val
-
-    # If diary is enabled, log task and achievement stats
-    if player.diary:
-      stats['Task_Reward'] = player.diary.cumulative_reward
-
-      for achievement in player.diary.achievements:
-        stats["Achievement_{achievement.name}"] = float(achievement.completed)
+    stats['Time_Alive'] = player.history.time_alive.val
 
     return stats
diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
@@ -123,7 +123,7 @@ def to_gym(self):
         self.entities.values, np.zeros((
           self.config.PLAYER_N_OBS - self.entities.values.shape[0],
           self.entities.values.shape[1]))
-      ]),
+      ])
     }
 
     if self.config.ITEM_SYSTEM_ENABLED:

diff --git a/nmmo/core/realm.py b/nmmo/core/realm.py
@@ -164,6 +164,7 @@ def step(self, actions):
     #  - 60: Move
     #  - 70: Sell - to guarantee the listed items are available to buy
     #  - 99: Comm
+
     for priority in sorted(merged):
       # TODO: we should be randomizing these, otherwise the lower ID agents
       # will always go first. --> ONLY SHUFFLE BUY
@@ -176,7 +177,6 @@ def step(self, actions):
         ent = self.entity(ent_id)
         if ent.alive:
           atn.call(self, ent, *args)
-
     dead = self.players.cull()
     self.npcs.cull()
 

diff --git a/nmmo/core/terrain.py b/nmmo/core/terrain.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import vec_noise
-from imageio import imread, imsave
+from imageio.v2 import imread, imsave
 from scipy import stats
 
 from nmmo import material
@@ -237,18 +237,13 @@ def generate_all_maps(self):
     path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS)
     os.makedirs(path_maps, exist_ok=True)
 
-    if not config.MAP_FORCE_GENERATION and os.listdir(path_maps):
-      # check if the folder has all the required maps
-      all_maps_exist = True
-      for idx in range(config.MAP_N, -1, -1):
-        map_file = path_maps + '/map' + str(idx+1) + '/map.npy'
-        if not os.path.exists(map_file):
-          # override MAP_FORCE_GENERATION = FALSE and generate maps
-          all_maps_exist = False
-          break
-
-      # do not generate maps if all maps exist
-      if all_maps_exist:
+    existing_maps = set(map_dir + '/map.npy' for map_dir in os.listdir(path_maps))
+    if not config.MAP_FORCE_GENERATION and existing_maps:
+      required_maps = {
+        f'map{idx}/map.npy' for idx in range(1, config.MAP_N+1)
+      }
+      missing = required_maps - existing_maps
+      if not missing:
         return
 
     if __debug__:

diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py
@@ -117,6 +117,9 @@ def cull(self):
     for entity in super().cull().values():
       self.spawn_dangers.append(entity.spawn_danger)
 
+    # refill npcs to target config.NPC_N, within config.NPC_SPAWN_ATTEMPTS
+    self.spawn()
+
   def actions(self, realm):
     actions = {}
     for idx, entity in self.entities.items():
@@ -143,7 +146,7 @@ def spawn_individual(self, r, c, idx):
 
   def spawn(self):
     idx = 0
-    for r, c in spawn.spawn_concurrent(self.config):
+    for r, c in spawn.spawn_concurrent(self.config, self.realm):
       idx += 1
 
       if idx in self.entities:

diff --git a/nmmo/entity/npc.py b/nmmo/entity/npc.py
@@ -81,15 +81,21 @@ def receive_damage(self, source, dmg):
     #   because source cannot take it if the inventory is full
     #   Also, destroy the remaining items if the source cannot take those
     for item in self.droptable.roll(self.realm, self.attack_level):
-      if source.inventory.space:
+      if source.is_player and source.inventory.space:
         source.inventory.receive(item)
+      else:
+        item.destroy()
 
     return False
 
   @staticmethod
   def spawn(realm, pos, iden):
     config = realm.config
 
+    # check the position
+    if realm.map.tiles[pos].impassible:
+      return None
+
     # Select AI Policy
     danger = combat.danger(config, pos)
     if danger >= config.NPC_SPAWN_AGGRESSIVE: