Merge pull request #78 from CarperAI/2.0

mask fixes
NeuralMMO · Sep 6, 2023 · 2cdf0f2 · 2cdf0f2
2 parents b14fafa + 3a1ebb0
commit 2cdf0f2
Show file tree

Hide file tree

Showing 14 changed files with 332 additions and 184 deletions.
diff --git a/nmmo/core/config.py b/nmmo/core/config.py
@@ -147,7 +147,7 @@ def game_system_enabled(self, name) -> bool:
   PROVIDE_ACTION_TARGETS       = True
   '''Provide action targets mask'''
 
-  PROVIDE_NOOP_ACTION_TARGET         = False
+  PROVIDE_NOOP_ACTION_TARGET   = True
   '''Provide a no-op option for each action'''
 
   PLAYERS                      = [Agent]
@@ -159,7 +159,7 @@ def game_system_enabled(self, name) -> bool:
   CURRICULUM_FILE_PATH = None
   '''Path to a curriculum task file containing a list of task specs for training'''
 
-  TASK_EMBED_DIM = 1024
+  TASK_EMBED_DIM = 4096
   '''Dimensionality of task embeddings'''
 
   ALLOW_MULTI_TASKS_PER_AGENT = False
@@ -188,7 +188,7 @@ def game_system_enabled(self, name) -> bool:
   PLAYER_N                     = None
   '''Maximum number of players spawnable in the environment'''
 
-  # TODO(kywch): CHECK if there could be 100+ entities within one's vision
+  # TODO: CHECK if there could be 100+ entities within one's vision
   PLAYER_N_OBS                 = 100
   '''Number of distinct agent observations'''
 
@@ -211,18 +211,6 @@ def PLAYER_VISION_DIAMETER(self):
   PLAYER_DEATH_FOG             = None
   '''How long before spawning death fog. None for no death fog'''
 
-
-  ############################################################################
-  ### Agent Parameters
-  IMMORTAL = False
-  '''Debug parameter: prevents agents from dying except by void'''
-
-  RESET_ON_DEATH = False
-  '''Whether to reset the environment whenever an agent dies'''
-
-  BASE_HEALTH                = 10
-  '''Initial Constitution level and agent health'''
-
   PLAYER_DEATH_FOG_SPEED       = 1
   '''Number of tiles per tick that the fog moves in'''
 
@@ -241,6 +229,14 @@ def PLAYER_TEAM_SIZE(self):
       assert not self.PLAYER_N % len(self.PLAYERS)
     return self.PLAYER_N // len(self.PLAYERS)
 
+  ############################################################################
+  ### Debug Parameters
+  IMMORTAL = False
+  '''Debug parameter: prevents agents from dying except by void'''
+
+  RESET_ON_DEATH = False
+  '''Debug parameter: whether to reset the environment whenever an agent dies'''
+
   ############################################################################
   ### Map Parameters
   MAP_N                        = 1
@@ -358,10 +354,18 @@ class Resource:
   RESOURCE_DEHYDRATION_RATE           = 10
   '''Damage per tick without water'''
 
-  RESOURCE_FOILAGE_CAPACITY            = 1
+  RESOURCE_RESILIENT_POPULATION       = 0
+  '''Training helper: proportion of population that is resilient to starvation and dehydration
+     (e.g. 0.1 means 10% of the population is resilient to starvation and dehydration)
+     This is to make some agents live longer during training to sample from "advanced" agents.'''
+
+  RESOURCE_DAMAGE_REDUCTION           = 0.5
+  '''Training helper: damage reduction from starvation and dehydration for resilient agents'''
+
+  RESOURCE_FOILAGE_CAPACITY           = 1
   '''Maximum number of harvests before a foilage tile decays'''
 
-  RESOURCE_FOILAGE_RESPAWN             = 0.025
+  RESOURCE_FOILAGE_RESPAWN            = 0.025
   '''Probability that a harvested foilage tile will regenerate each tick'''
 
   RESOURCE_HARVEST_RESTORE_FRACTION   = 1.0
@@ -413,30 +417,36 @@ def COMBAT_DAMAGE_FORMULA(self, offense, defense, multiplier):
   '''Reach of attacks using the Mage skill'''
 
 
+def default_exp_threshold(max_level):
+  import math
+  additional_exp_per_level = [round(90*math.sqrt(lvl))
+                              for lvl in range(1, max_level+1)]
+  return [sum(additional_exp_per_level[:lvl]) for lvl in range(max_level)]
+
 class Progression:
   '''Progression Game System'''
 
   PROGRESSION_SYSTEM_ENABLED        = True
   '''Game system flag'''
 
-  PROGRESSION_BASE_XP_SCALE         = 1
-  '''Base XP awarded for each skill usage -- multiplied by skill level'''
-
-  PROGRESSION_COMBAT_XP_SCALE       = 1
-  '''Multiplier on top of XP_SCALE for Melee, Range, and Mage'''
-
-  PROGRESSION_AMMUNITION_XP_SCALE   = 1
-  '''Multiplier on top of XP_SCALE for Prospecting, Carving, and Alchemy'''
-
-  PROGRESSION_CONSUMABLE_XP_SCALE   = 5
-  '''Multiplier on top of XP_SCALE for Fishing and Herbalism'''
-
   PROGRESSION_BASE_LEVEL            = 1
   '''Initial skill level'''
 
   PROGRESSION_LEVEL_MAX             = 10
   '''Max skill level'''
 
+  PROGRESSION_EXP_THRESHOLD         = default_exp_threshold(PROGRESSION_LEVEL_MAX)
+  '''A list of experience thresholds for each level'''
+
+  PROGRESSION_COMBAT_XP_SCALE       = 3
+  '''Additional XP for each attack for skills Melee, Range, and Mage'''
+
+  PROGRESSION_AMMUNITION_XP_SCALE   = 15
+  '''Additional XP for each harvest for Prospecting, Carving, and Alchemy'''
+
+  PROGRESSION_CONSUMABLE_XP_SCALE   = 30
+  '''Multiplier XP for each harvest for Fishing and Herbalism'''
+
   PROGRESSION_MELEE_BASE_DAMAGE     = 20
   '''Base Melee attack damage'''
 
@@ -523,7 +533,6 @@ def INVENTORY_N_OBS(self):
     return self.ITEM_INVENTORY_CAPACITY
 
 
-
 class Equipment:
   '''Equipment Game System'''
 
@@ -585,13 +594,13 @@ class Profession:
   PROFESSION_HERB_CAPACITY            = 1
   '''Maximum number of harvests before an herb tile decays'''
 
-  PROFESSION_HERB_RESPAWN             = 0.01
+  PROFESSION_HERB_RESPAWN             = 0.02
   '''Probability that a harvested herb tile will regenerate each tick'''
 
   PROFESSION_FISH_CAPACITY            = 1
   '''Maximum number of harvests before a fish tile decays'''
 
-  PROFESSION_FISH_RESPAWN             = 0.01
+  PROFESSION_FISH_RESPAWN             = 0.02
   '''Probability that a harvested fish tile will regenerate each tick'''
 
   @staticmethod

diff --git a/nmmo/core/observation.py b/nmmo/core/observation.py
@@ -199,8 +199,8 @@ def _make_action_targets(self):
         "MarketItem": self._make_buy_mask()
       }
       masks["GiveGold"] = {
-        "Price": self._make_give_gold_mask(), # reusing Price
-        "Target": self._make_give_target_mask()
+        "Price": self._make_give_gold_mask(),  # reusing Price
+        "Target": self._make_give_gold_target_mask()
       }
 
     if self.config.COMMUNICATION_SYSTEM_ENABLED:
@@ -213,8 +213,9 @@ def _make_action_targets(self):
   def _make_move_mask(self):
     if self.dummy_obs:
       mask = np.zeros(len(action.Direction.edges), dtype=np.int8)
-      mask[-1] = 1  # make sure the noop action is available
+      mask[-1] = 1  # for no-op
       return mask
+
     # pylint: disable=not-an-iterable
     return np.array([self.tile(*d.delta).material_id in material.Habitable.indices
                      for d in action.Direction.edges], dtype=np.int8)
@@ -251,6 +252,11 @@ def _make_attack_mask(self):
     not_me = self.entities.ids != agent.id
 
     attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity
+    if sum(attack_mask[:self.entities.len]) > 0:
+      # Mask the no-op option, since there should be at least one allowed move
+      # NOTE: this will make agents always attack if there is a valid target
+      attack_mask[-1] = 0
+
     return attack_mask
 
   def _make_use_mask(self):
@@ -325,9 +331,28 @@ def _make_give_target_mask(self):
     give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       give_mask[-1] = 1
-    # empty inventory -- nothing to give
-    if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\
-        or self.dummy_obs or self.agent_in_combat:
+
+    if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
+       or self.inventory.len == 0:
+      return give_mask
+
+    agent = self.agent()
+    entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"],
+                                           EntityState.State.attr_name_to_col["col"]]]
+    same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0
+    not_me = self.entities.ids != self.agent_id
+    player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)
+
+    give_mask[:self.entities.len] = same_tile & player & not_me
+    return give_mask
+
+  def _make_give_gold_target_mask(self):
+    give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
+    if self.config.PROVIDE_NOOP_ACTION_TARGET:
+      give_mask[-1] = 1
+
+    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
+       or int(self.agent().gold) == 0:
       return give_mask
 
     agent = self.agent()
@@ -343,13 +368,11 @@ def _make_give_target_mask(self):
   def _make_give_gold_mask(self):
     mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8)
     mask[0] = 1  # To avoid all-0 masks. If the agent has no gold, this action will be ignored.
-    if self.dummy_obs:
+    if self.dummy_obs or self.agent_in_combat:
       return mask
 
     gold = int(self.agent().gold)
-    if gold and not self.agent_in_combat:
-      mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
-
+    mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
     return mask
 
   def _make_sell_mask(self):
@@ -373,7 +396,8 @@ def _make_buy_mask(self):
     if self.config.PROVIDE_NOOP_ACTION_TARGET:
       buy_mask[-1] = 1
 
-    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
+    if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
+       or self.market.len == 0:
       return buy_mask
 
     agent = self.agent()

diff --git a/nmmo/entity/entity.py b/nmmo/entity/entity.py
@@ -32,17 +32,25 @@
     "food",
     "water",
 
-    # Combat
+    # Combat Skills
     "melee_level",
+    "melee_exp",
     "range_level",
+    "range_exp",
     "mage_level",
+    "mage_exp",
 
-    # Skills
+    # Harvest Skills
     "fishing_level",
+    "fishing_exp",
     "herbalism_level",
+    "herbalism_exp",
     "prospecting_level",
+    "prospecting_exp",
     "carving_level",
+    "carving_exp",
     "alchemy_level",
+    "alchemy_exp",
   ])
 
 EntityState.Limits = lambda config: {
@@ -69,13 +77,21 @@
   } if config.RESOURCE_SYSTEM_ENABLED else {}),
   **({
     "melee_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "melee_exp": (0, math.inf),
     "range_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "range_exp": (0, math.inf),
     "mage_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "mage_exp": (0, math.inf),
     "fishing_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "fishing_exp": (0, math.inf),
     "herbalism_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "herbalism_exp": (0, math.inf),
     "prospecting_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "prospecting_exp": (0, math.inf),
     "carving_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "carving_exp": (0, math.inf),
     "alchemy_level": (0, config.PROGRESSION_LEVEL_MAX),
+    "alchemy_exp": (0, math.inf),
   } if config.PROGRESSION_SYSTEM_ENABLED else {}),
 }
 
@@ -106,6 +122,7 @@ def __init__(self, ent, config):
     self.water = ent.water
     self.food = ent.food
     self.health_restore = 0
+    self.resilient = False
 
     self.health.update(config.PLAYER_BASE_HEALTH)
     if config.RESOURCE_SYSTEM_ENABLED:
@@ -128,10 +145,16 @@ def update(self):
       self.health.increment(restore)
 
     if self.food.empty:
-      self.health.decrement(self.config.RESOURCE_STARVATION_RATE)
+      starvation_damage = self.config.RESOURCE_STARVATION_RATE
+      if self.resilient:
+        starvation_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
+      self.health.decrement(int(starvation_damage))
 
     if self.water.empty:
-      self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE)
+      dehydration_damage = self.config.RESOURCE_DEHYDRATION_RATE
+      if self.resilient:
+        dehydration_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
+      self.health.decrement(int(dehydration_damage))
 
     # records both increase and decrease in health due to food and water
     self.health_restore = self.health.val - org_health
@@ -257,7 +280,6 @@ def ent_id(self):
 
   def packet(self):
     data = {}
-
     data['status'] = self.status.packet()
     data['history'] = self.history.packet()
     data['inventory'] = self.inventory.packet()

diff --git a/nmmo/entity/entity_manager.py b/nmmo/entity/entity_manager.py
@@ -148,14 +148,23 @@ def reset(self, np_random):
     self._agent_loader = self.loader_class(self.config, self._np_random)
     self.spawned = set()
 
-  def spawn_individual(self, r, c, idx):
+  def spawn_individual(self, r, c, idx, resilient=False):
     agent = next(self._agent_loader)
-    agent      = agent(self.config, idx)
-    player     = Player(self.realm, (r, c), agent)
+    agent = agent(self.config, idx)
+    player = Player(self.realm, (r, c), agent, resilient)
     super().spawn(player)
     self.spawned.add(idx)
 
   def spawn(self):
+    # Check and assign the constant heal flag
+    resilient_flag = [False] * self.config.PLAYER_N
+    if self.config.RESOURCE_SYSTEM_ENABLED:
+      num_resilient = round(self.config.RESOURCE_RESILIENT_POPULATION * self.config.PLAYER_N)
+      for idx in range(num_resilient):
+        resilient_flag[idx] = self.config.RESOURCE_DAMAGE_REDUCTION > 0
+      self._np_random.shuffle(resilient_flag)
+
+    # Spawn the players
     idx = 0
     while idx < self.config.PLAYER_N:
       idx += 1
@@ -167,4 +176,4 @@ def spawn(self):
       if idx in self.spawned:
         continue
 
-      self.spawn_individual(r, c, idx)
+      self.spawn_individual(r, c, idx, resilient_flag[idx-1])
diff --git a/nmmo/entity/player.py b/nmmo/entity/player.py
@@ -4,11 +4,12 @@
 
 # pylint: disable=no-member
 class Player(entity.Entity):
-  def __init__(self, realm, pos, agent):
+  def __init__(self, realm, pos, agent, resilient=False):
     super().__init__(realm, pos, agent.iden, agent.policy)
 
     self.agent    = agent
     self.immortal = realm.config.IMMORTAL
+    self.resources.resilient = resilient
 
     # Scripted hooks
     self.target = None
@@ -97,9 +98,7 @@ def equipment(self):
 
   def packet(self):
     data = super().packet()
-
     data['entID']     = self.ent_id
-
     data['resource']  = self.resources.packet()
     data['skills']    = self.skills.packet()
     data['inventory'] = self.inventory.packet()