Skip to content

Commit

Permalink
Merge pull request #74 from CarperAI/2.0
Browse files Browse the repository at this point in the history
2.0
  • Loading branch information
jsuarez5341 committed May 18, 2023
2 parents 5aaf33b + 60fece1 commit 416b7d8
Show file tree
Hide file tree
Showing 38 changed files with 2,459 additions and 818 deletions.
3 changes: 1 addition & 2 deletions nmmo/__init__.py
Expand Up @@ -9,7 +9,6 @@
from .core import config, agent
from .core.agent import Agent
from .core.env import Env
from .systems.achievement import Task
from .core.terrain import MapGenerator, Terrain

MOTD = rf''' ___ ___ ___ ___
Expand All @@ -26,7 +25,7 @@

__all__ = ['Env', 'config', 'agent', 'Agent', 'MapGenerator', 'Terrain',
'action', 'Action', 'material', 'spawn',
'Task', 'Overlay', 'OverlayRegistry']
'Overlay', 'OverlayRegistry']

try:
__all__.append('OpenSkillRating')
Expand Down
3 changes: 0 additions & 3 deletions nmmo/core/config.py
Expand Up @@ -154,9 +154,6 @@ def game_system_enabled(self, name) -> bool:
PLAYERS = [Agent]
'''Player classes from which to spawn'''

TASKS = []
'''Tasks for which to compute rewards'''

############################################################################
### Emulation Parameters

Expand Down
115 changes: 91 additions & 24 deletions nmmo/core/env.py
@@ -1,6 +1,7 @@
import functools
import random
from typing import Any, Dict, List
import copy
from typing import Any, Dict, List, Optional, Union, Tuple
from ordered_set import OrderedSet

import gym
Expand All @@ -14,13 +15,15 @@
from nmmo.entity.entity import Entity
from nmmo.systems.item import Item
from nmmo.core import realm

from nmmo.task.game_state import GameStateGenerator
from nmmo.task.task_api import Task
from nmmo.task.scenario import default_task
from scripted.baselines import Scripted


class Env(ParallelEnv):
# Environment wrapper for Neural MMO using the Parallel PettingZoo API

#pylint: disable=no-value-for-parameter
def __init__(self,
config: Default = nmmo.config.Default(), seed=None):
self._init_random(seed)
Expand All @@ -35,6 +38,18 @@ def __init__(self,
self._dead_agents = OrderedSet()
self.scripted_agents = OrderedSet()

self._gamestate_generator = GameStateGenerator(self.realm, self.config)
self.game_state = None
# Default task: rewards 1 each turn agent is alive
self.tasks: List[Tuple[Task,float]] = None
self._task_encoding = None
self._task_embedding_size = -1
t = default_task(self.possible_agents)
self.change_task(t,
embedding_size=self._task_embedding_size,
task_encoding=self._task_encoding,
reset=False)

# pylint: disable=method-cache-max-size-none
@functools.lru_cache(maxsize=None)
def observation_space(self, agent: int):
Expand All @@ -60,7 +75,7 @@ def box(rows, cols):
"Tick": gym.spaces.Discrete(1),
"AgentId": gym.spaces.Discrete(1),
"Tile": box(self.config.MAP_N_OBS, Tile.State.num_attributes),
"Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes)
"Entity": box(self.config.PLAYER_N_OBS, Entity.State.num_attributes),
}

if self.config.ITEM_SYSTEM_ENABLED:
Expand All @@ -72,6 +87,12 @@ def box(rows, cols):
if self.config.PROVIDE_ACTION_TARGETS:
obs_space['ActionTargets'] = self.action_space(None)

if self._task_encoding:
obs_space['Task'] = gym.spaces.Box(
low=-2**20, high=2**20,
shape=(self._task_embedding_size,),
dtype=np.float32)

return gym.spaces.Dict(obs_space)

def _init_random(self, seed):
Expand Down Expand Up @@ -109,6 +130,28 @@ def action_space(self, agent):
############################################################################
# Core API

def change_task(self,
new_tasks: List[Union[Tuple[Task, float], Task]],
task_encoding: Optional[Dict[int, np.ndarray]] = None,
embedding_size: int=16,
reset: bool=True,
map_id=None,
seed=None,
options=None):
""" Changes the task given to each agent
Args:
new_task: The task to complete and calculate rewards
task_encoding: A mapping from eid to encoded task
embedding_size: The size of each embedding
reset: Resets the environment
"""
self._tasks = [t if isinstance(t, Tuple) else (t,1) for t in new_tasks]
self._task_encoding = task_encoding
self._task_embedding_size = embedding_size
if reset:
self.reset(map_id=map_id, seed=seed, options=options)

# TODO: This doesn't conform to the PettingZoo API
# pylint: disable=arguments-renamed
def reset(self, map_id=None, seed=None, options=None):
Expand Down Expand Up @@ -142,9 +185,16 @@ def reset(self, map_id=None, seed=None, options=None):
if isinstance(ent.agent, Scripted):
self.scripted_agents.add(eid)

self.tasks = copy.deepcopy(self._tasks)
self.obs = self._compute_observations()
self._gamestate_generator = GameStateGenerator(self.realm, self.config)

return {a: o.to_gym() for a,o in self.obs.items()}
gym_obs = {}
for a, o in self.obs.items():
gym_obs[a] = o.to_gym()
if self._task_encoding:
gym_obs[a]['Task'] = self._encode_goal().get(a,np.zeros(self._task_embedding_size))
return gym_obs

def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
'''Simulates one game tick or timestep
Expand Down Expand Up @@ -239,18 +289,15 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):
Provided for conformity with PettingZoo
'''
assert self.obs is not None, 'step() called before reset'

# Add in scripted agents' actions, if any
if self.scripted_agents:
actions = self._compute_scripted_agent_actions(actions)

# Drop invalid actions of BOTH neural and scripted agents
# we don't need _deserialize_scripted_actions() anymore
actions = self._validate_actions(actions)

# Execute actions
self.realm.step(actions)

dones = {}
for eid in self.possible_agents:
if eid not in self._dead_agents and (
Expand All @@ -262,7 +309,11 @@ def step(self, actions: Dict[int, Dict[str, Dict[str, Any]]]):

# Store the observations, since actions reference them
self.obs = self._compute_observations()
gym_obs = {a: o.to_gym() for a,o in self.obs.items()}
gym_obs = {}
for a, o in self.obs.items():
gym_obs[a] = o.to_gym()
if self._task_encoding:
gym_obs[a]['Task'] = self._encode_goal()[a]

rewards, infos = self._compute_rewards(self.obs.keys(), dones)

Expand Down Expand Up @@ -332,6 +383,7 @@ def _compute_observations(self):
obs: Dictionary of observations for each agent
obs[agent_id] = {
"Entity": [e1, e2, ...],
"Task": [encoded_task],
"Tile": [t1, t2, ...],
"Inventory": [i1, i2, ...],
"Market": [m1, m2, ...],
Expand Down Expand Up @@ -364,12 +416,17 @@ def _compute_observations(self):

inventory = Item.Query.owned_by(self.realm.datastore, agent_id)

obs[agent_id] = Observation(
self.config, self.realm.tick,
agent_id, visible_tiles, visible_entities, inventory, market)

obs[agent_id] = Observation(self.config,
self.realm.tick,
agent_id,
visible_tiles,
visible_entities,
inventory, market)
return obs

def _encode_goal(self):
return self._task_encoding

def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]):
'''Computes the reward for the specified agent
Expand All @@ -385,21 +442,31 @@ def _compute_rewards(self, agents: List[AgentID], dones: Dict[AgentID, bool]):
The reward for the actions on the previous timestep of the
entity identified by ent_id.
'''
# Initialization
self.game_state = self._gamestate_generator.generate(self.realm, self.obs)
infos = {}
rewards = { eid: -1 for eid in dones }

for agent_id in agents:
infos[agent_id] = {}
agent = self.realm.players.get(agent_id)
assert agent is not None, f'Agent {agent_id} not found'

if agent.diary is not None:
rewards[agent_id] = sum(agent.diary.rewards.values())
infos[agent_id].update(agent.diary.rewards)
for eid in agents:
infos[eid] = {}
infos[eid]['task'] = {}
rewards = {eid: 0 for eid in agents}

# Compute Rewards and infos
for task, weight in self.tasks:
task_rewards, task_infos = task.compute_rewards(self.game_state)
for eid, reward in task_rewards.items():
# Rewards, weighted
rewards[eid] = rewards.get(eid,0) + reward * weight
# Infos
for eid, info in task_infos.items():
if eid in infos:
infos[eid]['task'] = {**infos[eid]['task'], **info}

# Remove rewards for dead agents (?)
for eid in dones:
rewards[eid] = 0

return rewards, infos


############################################################################
# PettingZoo API
############################################################################
Expand Down
16 changes: 1 addition & 15 deletions nmmo/core/log_helper.py
Expand Up @@ -80,13 +80,6 @@ def _register_player_stat(self, name: str, func: callable):

def _register_player_stats(self):
self._register_player_stat('Basic/TimeAlive', lambda player: player.history.time_alive.val)

if self.config.TASKS:
self._register_player_stat('Task/Completed', lambda player: player.diary.completed)
self._register_player_stat('Task/Reward' , lambda player: player.diary.cumulative_reward)
else:
self._register_player_stat('Task/Completed', lambda player: player.history.time_alive.val)

# Skills
if self.config.PROGRESSION_SYSTEM_ENABLED:
if self.config.COMBAT_SYSTEM_ENABLED:
Expand Down Expand Up @@ -137,13 +130,6 @@ def _player_stats(self, player: Agent) -> Dict[str, float]:
for key, stat_func in self._player_stats_funcs.items():
stats[f'{key}_{policy}'] = stat_func(player)

stats['Task_Reward'] = player.history.time_alive.val

# If diary is enabled, log task and achievement stats
if player.diary:
stats['Task_Reward'] = player.diary.cumulative_reward

for achievement in player.diary.achievements:
stats["Achievement_{achievement.name}"] = float(achievement.completed)
stats['Time_Alive'] = player.history.time_alive.val

return stats
2 changes: 1 addition & 1 deletion nmmo/core/observation.py
Expand Up @@ -123,7 +123,7 @@ def to_gym(self):
self.entities.values, np.zeros((
self.config.PLAYER_N_OBS - self.entities.values.shape[0],
self.entities.values.shape[1]))
]),
])
}

if self.config.ITEM_SYSTEM_ENABLED:
Expand Down
2 changes: 1 addition & 1 deletion nmmo/core/realm.py
Expand Up @@ -164,6 +164,7 @@ def step(self, actions):
# - 60: Move
# - 70: Sell - to guarantee the listed items are available to buy
# - 99: Comm

for priority in sorted(merged):
# TODO: we should be randomizing these, otherwise the lower ID agents
# will always go first. --> ONLY SHUFFLE BUY
Expand All @@ -176,7 +177,6 @@ def step(self, actions):
ent = self.entity(ent_id)
if ent.alive:
atn.call(self, ent, *args)

dead = self.players.cull()
self.npcs.cull()

Expand Down
21 changes: 8 additions & 13 deletions nmmo/core/terrain.py
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import vec_noise
from imageio import imread, imsave
from imageio.v2 import imread, imsave
from scipy import stats

from nmmo import material
Expand Down Expand Up @@ -237,18 +237,13 @@ def generate_all_maps(self):
path_maps = os.path.join(config.PATH_CWD, config.PATH_MAPS)
os.makedirs(path_maps, exist_ok=True)

if not config.MAP_FORCE_GENERATION and os.listdir(path_maps):
# check if the folder has all the required maps
all_maps_exist = True
for idx in range(config.MAP_N, -1, -1):
map_file = path_maps + '/map' + str(idx+1) + '/map.npy'
if not os.path.exists(map_file):
# override MAP_FORCE_GENERATION = FALSE and generate maps
all_maps_exist = False
break

# do not generate maps if all maps exist
if all_maps_exist:
existing_maps = set(map_dir + '/map.npy' for map_dir in os.listdir(path_maps))
if not config.MAP_FORCE_GENERATION and existing_maps:
required_maps = {
f'map{idx}/map.npy' for idx in range(1, config.MAP_N+1)
}
missing = required_maps - existing_maps
if not missing:
return

if __debug__:
Expand Down
5 changes: 4 additions & 1 deletion nmmo/entity/entity_manager.py
Expand Up @@ -117,6 +117,9 @@ def cull(self):
for entity in super().cull().values():
self.spawn_dangers.append(entity.spawn_danger)

# refill npcs to target config.NPC_N, within config.NPC_SPAWN_ATTEMPTS
self.spawn()

def actions(self, realm):
actions = {}
for idx, entity in self.entities.items():
Expand All @@ -143,7 +146,7 @@ def spawn_individual(self, r, c, idx):

def spawn(self):
idx = 0
for r, c in spawn.spawn_concurrent(self.config):
for r, c in spawn.spawn_concurrent(self.config, self.realm):
idx += 1

if idx in self.entities:
Expand Down
8 changes: 7 additions & 1 deletion nmmo/entity/npc.py
Expand Up @@ -81,15 +81,21 @@ def receive_damage(self, source, dmg):
# because source cannot take it if the inventory is full
# Also, destroy the remaining items if the source cannot take those
for item in self.droptable.roll(self.realm, self.attack_level):
if source.inventory.space:
if source.is_player and source.inventory.space:
source.inventory.receive(item)
else:
item.destroy()

return False

@staticmethod
def spawn(realm, pos, iden):
config = realm.config

# check the position
if realm.map.tiles[pos].impassible:
return None

# Select AI Policy
danger = combat.danger(config, pos)
if danger >= config.NPC_SPAWN_AGGRESSIVE:
Expand Down

0 comments on commit 416b7d8

Please sign in to comment.