Skip to content

Commit

Permalink
Merge pull request #78 from CarperAI/2.0
Browse files Browse the repository at this point in the history
mask fixes
  • Loading branch information
jsuarez5341 committed Sep 6, 2023
2 parents b14fafa + 3a1ebb0 commit 2cdf0f2
Show file tree
Hide file tree
Showing 14 changed files with 332 additions and 184 deletions.
73 changes: 41 additions & 32 deletions nmmo/core/config.py
Expand Up @@ -147,7 +147,7 @@ def game_system_enabled(self, name) -> bool:
PROVIDE_ACTION_TARGETS = True
'''Provide action targets mask'''

PROVIDE_NOOP_ACTION_TARGET = False
PROVIDE_NOOP_ACTION_TARGET = True
'''Provide a no-op option for each action'''

PLAYERS = [Agent]
Expand All @@ -159,7 +159,7 @@ def game_system_enabled(self, name) -> bool:
CURRICULUM_FILE_PATH = None
'''Path to a curriculum task file containing a list of task specs for training'''

TASK_EMBED_DIM = 1024
TASK_EMBED_DIM = 4096
'''Dimensionality of task embeddings'''

ALLOW_MULTI_TASKS_PER_AGENT = False
Expand Down Expand Up @@ -188,7 +188,7 @@ def game_system_enabled(self, name) -> bool:
PLAYER_N = None
'''Maximum number of players spawnable in the environment'''

# TODO(kywch): CHECK if there could be 100+ entities within one's vision
# TODO: CHECK if there could be 100+ entities within one's vision
PLAYER_N_OBS = 100
'''Number of distinct agent observations'''

Expand All @@ -211,18 +211,6 @@ def PLAYER_VISION_DIAMETER(self):
PLAYER_DEATH_FOG = None
'''How long before spawning death fog. None for no death fog'''


############################################################################
### Agent Parameters
IMMORTAL = False
'''Debug parameter: prevents agents from dying except by void'''

RESET_ON_DEATH = False
'''Whether to reset the environment whenever an agent dies'''

BASE_HEALTH = 10
'''Initial Constitution level and agent health'''

PLAYER_DEATH_FOG_SPEED = 1
'''Number of tiles per tick that the fog moves in'''

Expand All @@ -241,6 +229,14 @@ def PLAYER_TEAM_SIZE(self):
assert not self.PLAYER_N % len(self.PLAYERS)
return self.PLAYER_N // len(self.PLAYERS)

############################################################################
### Debug Parameters
IMMORTAL = False
'''Debug parameter: prevents agents from dying except by void'''

RESET_ON_DEATH = False
'''Debug parameter: whether to reset the environment whenever an agent dies'''

############################################################################
### Map Parameters
MAP_N = 1
Expand Down Expand Up @@ -358,10 +354,18 @@ class Resource:
RESOURCE_DEHYDRATION_RATE = 10
'''Damage per tick without water'''

RESOURCE_FOILAGE_CAPACITY = 1
RESOURCE_RESILIENT_POPULATION = 0
'''Training helper: proportion of population that is resilient to starvation and dehydration
(e.g. 0.1 means 10% of the population is resilient to starvation and dehydration)
This is to make some agents live longer during training to sample from "advanced" agents.'''

RESOURCE_DAMAGE_REDUCTION = 0.5
'''Training helper: damage reduction from starvation and dehydration for resilient agents'''

RESOURCE_FOILAGE_CAPACITY = 1
'''Maximum number of harvests before a foilage tile decays'''

RESOURCE_FOILAGE_RESPAWN = 0.025
RESOURCE_FOILAGE_RESPAWN = 0.025
'''Probability that a harvested foilage tile will regenerate each tick'''

RESOURCE_HARVEST_RESTORE_FRACTION = 1.0
Expand Down Expand Up @@ -413,30 +417,36 @@ def COMBAT_DAMAGE_FORMULA(self, offense, defense, multiplier):
'''Reach of attacks using the Mage skill'''


def default_exp_threshold(max_level):
import math
additional_exp_per_level = [round(90*math.sqrt(lvl))
for lvl in range(1, max_level+1)]
return [sum(additional_exp_per_level[:lvl]) for lvl in range(max_level)]

class Progression:
'''Progression Game System'''

PROGRESSION_SYSTEM_ENABLED = True
'''Game system flag'''

PROGRESSION_BASE_XP_SCALE = 1
'''Base XP awarded for each skill usage -- multiplied by skill level'''

PROGRESSION_COMBAT_XP_SCALE = 1
'''Multiplier on top of XP_SCALE for Melee, Range, and Mage'''

PROGRESSION_AMMUNITION_XP_SCALE = 1
'''Multiplier on top of XP_SCALE for Prospecting, Carving, and Alchemy'''

PROGRESSION_CONSUMABLE_XP_SCALE = 5
'''Multiplier on top of XP_SCALE for Fishing and Herbalism'''

PROGRESSION_BASE_LEVEL = 1
'''Initial skill level'''

PROGRESSION_LEVEL_MAX = 10
'''Max skill level'''

PROGRESSION_EXP_THRESHOLD = default_exp_threshold(PROGRESSION_LEVEL_MAX)
'''A list of experience thresholds for each level'''

PROGRESSION_COMBAT_XP_SCALE = 3
'''Additional XP for each attack for skills Melee, Range, and Mage'''

PROGRESSION_AMMUNITION_XP_SCALE = 15
'''Additional XP for each harvest for Prospecting, Carving, and Alchemy'''

PROGRESSION_CONSUMABLE_XP_SCALE = 30
'''Multiplier XP for each harvest for Fishing and Herbalism'''

PROGRESSION_MELEE_BASE_DAMAGE = 20
'''Base Melee attack damage'''

Expand Down Expand Up @@ -523,7 +533,6 @@ def INVENTORY_N_OBS(self):
return self.ITEM_INVENTORY_CAPACITY



class Equipment:
'''Equipment Game System'''

Expand Down Expand Up @@ -585,13 +594,13 @@ class Profession:
PROFESSION_HERB_CAPACITY = 1
'''Maximum number of harvests before an herb tile decays'''

PROFESSION_HERB_RESPAWN = 0.01
PROFESSION_HERB_RESPAWN = 0.02
'''Probability that a harvested herb tile will regenerate each tick'''

PROFESSION_FISH_CAPACITY = 1
'''Maximum number of harvests before a fish tile decays'''

PROFESSION_FISH_RESPAWN = 0.01
PROFESSION_FISH_RESPAWN = 0.02
'''Probability that a harvested fish tile will regenerate each tick'''

@staticmethod
Expand Down
46 changes: 35 additions & 11 deletions nmmo/core/observation.py
Expand Up @@ -199,8 +199,8 @@ def _make_action_targets(self):
"MarketItem": self._make_buy_mask()
}
masks["GiveGold"] = {
"Price": self._make_give_gold_mask(), # reusing Price
"Target": self._make_give_target_mask()
"Price": self._make_give_gold_mask(), # reusing Price
"Target": self._make_give_gold_target_mask()
}

if self.config.COMMUNICATION_SYSTEM_ENABLED:
Expand All @@ -213,8 +213,9 @@ def _make_action_targets(self):
def _make_move_mask(self):
if self.dummy_obs:
mask = np.zeros(len(action.Direction.edges), dtype=np.int8)
mask[-1] = 1 # make sure the noop action is available
mask[-1] = 1 # for no-op
return mask

# pylint: disable=not-an-iterable
return np.array([self.tile(*d.delta).material_id in material.Habitable.indices
for d in action.Direction.edges], dtype=np.int8)
Expand Down Expand Up @@ -251,6 +252,11 @@ def _make_attack_mask(self):
not_me = self.entities.ids != agent.id

attack_mask[:self.entities.len] = within_range & not_me & no_spawn_immunity
if sum(attack_mask[:self.entities.len]) > 0:
# Mask the no-op option, since there should be at least one allowed move
# NOTE: this will make agents always attack if there is a valid target
attack_mask[-1] = 0

return attack_mask

def _make_use_mask(self):
Expand Down Expand Up @@ -325,9 +331,28 @@ def _make_give_target_mask(self):
give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
if self.config.PROVIDE_NOOP_ACTION_TARGET:
give_mask[-1] = 1
# empty inventory -- nothing to give
if not (self.config.ITEM_SYSTEM_ENABLED and self.inventory.len > 0)\
or self.dummy_obs or self.agent_in_combat:

if not self.config.ITEM_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
or self.inventory.len == 0:
return give_mask

agent = self.agent()
entities_pos = self.entities.values[:,[EntityState.State.attr_name_to_col["row"],
EntityState.State.attr_name_to_col["col"]]]
same_tile = utils.linf(entities_pos, (agent.row, agent.col)) == 0
not_me = self.entities.ids != self.agent_id
player = (self.entities.values[:,EntityState.State.attr_name_to_col["npc_type"]] == 0)

give_mask[:self.entities.len] = same_tile & player & not_me
return give_mask

def _make_give_gold_target_mask(self):
give_mask = np.zeros(self.config.PLAYER_N_OBS + self._noop_action, dtype=np.int8)
if self.config.PROVIDE_NOOP_ACTION_TARGET:
give_mask[-1] = 1

if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat\
or int(self.agent().gold) == 0:
return give_mask

agent = self.agent()
Expand All @@ -343,13 +368,11 @@ def _make_give_target_mask(self):
def _make_give_gold_mask(self):
mask = np.zeros(self.config.PRICE_N_OBS, dtype=np.int8)
mask[0] = 1 # To avoid all-0 masks. If the agent has no gold, this action will be ignored.
if self.dummy_obs:
if self.dummy_obs or self.agent_in_combat:
return mask

gold = int(self.agent().gold)
if gold and not self.agent_in_combat:
mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1

mask[:gold] = 1 # NOTE that action.Price starts from Discrete_1
return mask

def _make_sell_mask(self):
Expand All @@ -373,7 +396,8 @@ def _make_buy_mask(self):
if self.config.PROVIDE_NOOP_ACTION_TARGET:
buy_mask[-1] = 1

if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat:
if not self.config.EXCHANGE_SYSTEM_ENABLED or self.dummy_obs or self.agent_in_combat \
or self.market.len == 0:
return buy_mask

agent = self.agent()
Expand Down
32 changes: 27 additions & 5 deletions nmmo/entity/entity.py
Expand Up @@ -32,17 +32,25 @@
"food",
"water",

# Combat
# Combat Skills
"melee_level",
"melee_exp",
"range_level",
"range_exp",
"mage_level",
"mage_exp",

# Skills
# Harvest Skills
"fishing_level",
"fishing_exp",
"herbalism_level",
"herbalism_exp",
"prospecting_level",
"prospecting_exp",
"carving_level",
"carving_exp",
"alchemy_level",
"alchemy_exp",
])

EntityState.Limits = lambda config: {
Expand All @@ -69,13 +77,21 @@
} if config.RESOURCE_SYSTEM_ENABLED else {}),
**({
"melee_level": (0, config.PROGRESSION_LEVEL_MAX),
"melee_exp": (0, math.inf),
"range_level": (0, config.PROGRESSION_LEVEL_MAX),
"range_exp": (0, math.inf),
"mage_level": (0, config.PROGRESSION_LEVEL_MAX),
"mage_exp": (0, math.inf),
"fishing_level": (0, config.PROGRESSION_LEVEL_MAX),
"fishing_exp": (0, math.inf),
"herbalism_level": (0, config.PROGRESSION_LEVEL_MAX),
"herbalism_exp": (0, math.inf),
"prospecting_level": (0, config.PROGRESSION_LEVEL_MAX),
"prospecting_exp": (0, math.inf),
"carving_level": (0, config.PROGRESSION_LEVEL_MAX),
"carving_exp": (0, math.inf),
"alchemy_level": (0, config.PROGRESSION_LEVEL_MAX),
"alchemy_exp": (0, math.inf),
} if config.PROGRESSION_SYSTEM_ENABLED else {}),
}

Expand Down Expand Up @@ -106,6 +122,7 @@ def __init__(self, ent, config):
self.water = ent.water
self.food = ent.food
self.health_restore = 0
self.resilient = False

self.health.update(config.PLAYER_BASE_HEALTH)
if config.RESOURCE_SYSTEM_ENABLED:
Expand All @@ -128,10 +145,16 @@ def update(self):
self.health.increment(restore)

if self.food.empty:
self.health.decrement(self.config.RESOURCE_STARVATION_RATE)
starvation_damage = self.config.RESOURCE_STARVATION_RATE
if self.resilient:
starvation_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
self.health.decrement(int(starvation_damage))

if self.water.empty:
self.health.decrement(self.config.RESOURCE_DEHYDRATION_RATE)
dehydration_damage = self.config.RESOURCE_DEHYDRATION_RATE
if self.resilient:
dehydration_damage *= self.config.RESOURCE_DAMAGE_REDUCTION
self.health.decrement(int(dehydration_damage))

# records both increase and decrease in health due to food and water
self.health_restore = self.health.val - org_health
Expand Down Expand Up @@ -257,7 +280,6 @@ def ent_id(self):

def packet(self):
data = {}

data['status'] = self.status.packet()
data['history'] = self.history.packet()
data['inventory'] = self.inventory.packet()
Expand Down
17 changes: 13 additions & 4 deletions nmmo/entity/entity_manager.py
Expand Up @@ -148,14 +148,23 @@ def reset(self, np_random):
self._agent_loader = self.loader_class(self.config, self._np_random)
self.spawned = set()

def spawn_individual(self, r, c, idx):
def spawn_individual(self, r, c, idx, resilient=False):
agent = next(self._agent_loader)
agent = agent(self.config, idx)
player = Player(self.realm, (r, c), agent)
agent = agent(self.config, idx)
player = Player(self.realm, (r, c), agent, resilient)
super().spawn(player)
self.spawned.add(idx)

def spawn(self):
# Check and assign the constant heal flag
resilient_flag = [False] * self.config.PLAYER_N
if self.config.RESOURCE_SYSTEM_ENABLED:
num_resilient = round(self.config.RESOURCE_RESILIENT_POPULATION * self.config.PLAYER_N)
for idx in range(num_resilient):
resilient_flag[idx] = self.config.RESOURCE_DAMAGE_REDUCTION > 0
self._np_random.shuffle(resilient_flag)

# Spawn the players
idx = 0
while idx < self.config.PLAYER_N:
idx += 1
Expand All @@ -167,4 +176,4 @@ def spawn(self):
if idx in self.spawned:
continue

self.spawn_individual(r, c, idx)
self.spawn_individual(r, c, idx, resilient_flag[idx-1])
5 changes: 2 additions & 3 deletions nmmo/entity/player.py
Expand Up @@ -4,11 +4,12 @@

# pylint: disable=no-member
class Player(entity.Entity):
def __init__(self, realm, pos, agent):
def __init__(self, realm, pos, agent, resilient=False):
super().__init__(realm, pos, agent.iden, agent.policy)

self.agent = agent
self.immortal = realm.config.IMMORTAL
self.resources.resilient = resilient

# Scripted hooks
self.target = None
Expand Down Expand Up @@ -97,9 +98,7 @@ def equipment(self):

def packet(self):
data = super().packet()

data['entID'] = self.ent_id

data['resource'] = self.resources.packet()
data['skills'] = self.skills.packet()
data['inventory'] = self.inventory.packet()
Expand Down

0 comments on commit 2cdf0f2

Please sign in to comment.