-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d566375
Showing
9 changed files
with
2,234 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data = np.load(\"valid_100K.npz\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(108262, 11, 11, 18)" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"observations = data['observations']\n", | ||
"observations.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(108262,)" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"actions = data['actions']\n", | ||
"actions.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(108262,)" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"rewards = data['rewards']\n", | ||
"rewards.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"discount = 0.99\n", | ||
"disc_rewards = []\n", | ||
"for r in reversed(rewards):\n", | ||
" if r != 0:\n", | ||
" rew = r\n", | ||
" disc_rewards.insert(0, rew)\n", | ||
" rew *= discount" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.savez_compressed(\"valid_100K_disc0.99.npz\", observations=observations, actions=actions, rewards=disc_rewards)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"discount = 1\n", | ||
"disc_rewards = []\n", | ||
"for r in reversed(rewards):\n", | ||
" if r != 0:\n", | ||
" rew = r\n", | ||
" disc_rewards.insert(0, rew)\n", | ||
" rew *= discount" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.savez_compressed(\"valid_100K_disc1.npz\", observations=observations, actions=actions, rewards=disc_rewards)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import pommerman | ||
from pommerman import agents | ||
import numpy as np | ||
import argparse | ||
from copy import deepcopy | ||
|
||
|
||
def featurize(obs): | ||
# TODO: history of n moves? | ||
board = obs['board'] | ||
|
||
# convert board items into bitmaps | ||
maps = [board == i for i in range(10)] | ||
maps.append(obs['bomb_blast_strength']) | ||
maps.append(obs['bomb_life']) | ||
|
||
# duplicate ammo, blast_strength and can_kick over entire map | ||
maps.append(np.full(board.shape, obs['ammo'])) | ||
maps.append(np.full(board.shape, obs['blast_strength'])) | ||
maps.append(np.full(board.shape, obs['can_kick'])) | ||
|
||
# add my position as bitmap | ||
position = np.zeros(board.shape) | ||
position[obs['position']] = 1 | ||
maps.append(position) | ||
|
||
# add teammate | ||
if obs['teammate'] is not None: | ||
maps.append(board == obs['teammate'].value) | ||
else: | ||
maps.append(np.zeros(board.shape)) | ||
|
||
# add enemies | ||
enemies = [board == e.value for e in obs['enemies']] | ||
maps.append(np.any(enemies, axis=0)) | ||
|
||
return np.stack(maps, axis=2) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--num_episodes', type=int, default=1000) | ||
parser.add_argument('--render', action="store_true", default=False) | ||
parser.add_argument('out_file') | ||
args = parser.parse_args() | ||
|
||
# Create a set of agents (exactly four) | ||
agent_list = [ | ||
agents.SimpleAgent(), | ||
agents.SimpleAgent(), | ||
agents.SimpleAgent(), | ||
agents.SimpleAgent(), | ||
] | ||
|
||
# Make the "Free-For-All" environment using the agent list | ||
env = pommerman.make('PommeFFACompetition-v0', agent_list) | ||
|
||
observations = [[], [], [], []] | ||
actions = [[], [], [], []] | ||
rewards = [[], [], [], []] | ||
|
||
# Run the episodes just like OpenAI Gym | ||
for i in range(args.num_episodes): | ||
obs = env.reset() | ||
done = False | ||
reward = [0, 0, 0, 0] | ||
t = 0 | ||
while not done: | ||
if args.render: | ||
env.render() | ||
action = env.act(obs) | ||
new_obs, new_reward, done, info = env.step(action) | ||
for j in range(4): | ||
if reward[j] == 0: | ||
observations[j].append(featurize(obs[j])) | ||
actions[j].append(action[j]) | ||
rewards[j].append(new_reward[j]) | ||
obs = deepcopy(new_obs) | ||
reward = deepcopy(new_reward) | ||
t += 1 | ||
print("Episode:", i + 1, "Max length:", t, "Rewards:", reward) | ||
env.close() | ||
|
||
np.savez_compressed(args.out_file, | ||
observations=sum(observations, []), | ||
actions=sum(actions, []), | ||
rewards=sum(rewards, [])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import pommerman | ||
from pommerman import agents | ||
import numpy as np | ||
import time | ||
from keras.models import load_model | ||
import keras.backend as K | ||
import tensorflow as tf | ||
import argparse | ||
|
||
|
||
def featurize(obs): | ||
# TODO: history of n moves? | ||
board = obs['board'] | ||
|
||
# convert board items into bitmaps | ||
maps = [board == i for i in range(10)] | ||
maps.append(obs['bomb_blast_strength']) | ||
maps.append(obs['bomb_life']) | ||
|
||
# duplicate ammo, blast_strength and can_kick over entire map | ||
maps.append(np.full(board.shape, obs['ammo'])) | ||
maps.append(np.full(board.shape, obs['blast_strength'])) | ||
maps.append(np.full(board.shape, obs['can_kick'])) | ||
|
||
# add my position as bitmap | ||
position = np.zeros(board.shape) | ||
position[obs['position']] = 1 | ||
maps.append(position) | ||
|
||
# add teammate | ||
if obs['teammate'] is not None: | ||
maps.append(board == obs['teammate'].value) | ||
else: | ||
maps.append(np.zeros(board.shape)) | ||
|
||
# add enemies | ||
enemies = [board == e.value for e in obs['enemies']] | ||
maps.append(np.any(enemies, axis=0)) | ||
|
||
return np.stack(maps, axis=2) | ||
|
||
|
||
class KerasAgent(agents.BaseAgent): | ||
def __init__(self, model_file): | ||
super().__init__() | ||
self.model = load_model(model_file) | ||
|
||
def act(self, obs, action_space): | ||
feat = featurize(obs) | ||
probs, values = self.model.predict(feat[np.newaxis]) | ||
action = np.argmax(probs[0]) | ||
#print("Action:", action) | ||
return action | ||
|
||
|
||
def eval_model(agent_id, model_file, num_episodes): | ||
# Create a set of agents (exactly four) | ||
agent_list = [ | ||
agents.SimpleAgent(), | ||
agents.SimpleAgent(), | ||
agents.SimpleAgent(), | ||
] | ||
agent_list.insert(agent_id, KerasAgent(model_file)) | ||
|
||
# Make the "Free-For-All" environment using the agent list | ||
env = pommerman.make('PommeFFACompetition-v0', agent_list) | ||
|
||
rewards = [] | ||
lengths = [] | ||
start_time = time.time() | ||
# Run the episodes just like OpenAI Gym | ||
for i_episode in range(num_episodes): | ||
state = env.reset() | ||
done = False | ||
lens = [None] * 4 | ||
t = 0 | ||
while not done: | ||
if args.render: | ||
env.render() | ||
actions = env.act(state) | ||
state, reward, done, info = env.step(actions) | ||
for j in range(4): | ||
if lens[j] is None and reward[j] != 0: | ||
lens[j] = t | ||
t += 1 | ||
rewards.append(reward) | ||
lengths.append(lens) | ||
print('Episode ', i_episode, "reward:", reward[agent_id], "length:", lens[agent_id]) | ||
elapsed = time.time() - start_time | ||
env.close() | ||
return rewards, lengths, elapsed | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('model_file') | ||
parser.add_argument('--num_episodes', type=int, default=400) | ||
parser.add_argument('--render', action='store_true', default=False) | ||
args = parser.parse_args() | ||
|
||
# make sure TF does not allocate all memory | ||
config = tf.ConfigProto() | ||
config.gpu_options.allow_growth = True | ||
K.set_session(tf.Session(config=config)) | ||
|
||
rewards0, lengths0, elapsed0 = eval_model(0, args.model_file, args.num_episodes // 4) | ||
rewards1, lengths1, elapsed1 = eval_model(1, args.model_file, args.num_episodes // 4) | ||
rewards2, lengths2, elapsed2 = eval_model(2, args.model_file, args.num_episodes // 4) | ||
rewards3, lengths3, elapsed3 = eval_model(3, args.model_file, args.num_episodes // 4) | ||
|
||
rewards = [(r0[0], r1[1], r2[2], r3[3]) for r0, r1, r2, r3 in zip(rewards0, rewards1, rewards2, rewards3)] | ||
lengths = [(l0[0], l1[1], l2[2], l3[3]) for l0, l1, l2, l3 in zip(lengths0, lengths1, lengths2, lengths3)] | ||
|
||
print("Average reward:", np.mean(rewards)) | ||
print("Average length:", np.mean(lengths)) | ||
|
||
print("Average rewards per position:", np.mean(rewards, axis=0)) | ||
print("Average lengths per position:", np.mean(lengths, axis=0)) | ||
|
||
elapsed = elapsed0 + elapsed1 + elapsed2 + elapsed3 | ||
total_timesteps = np.sum(np.max(np.concatenate([lengths0, lengths1, lengths2, lengths3], axis=0), axis=1)) | ||
print("Time per timestep:", elapsed / total_timesteps) |
Oops, something went wrong.