Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
tambetm committed Jun 20, 2018
0 parents commit d566375
Show file tree
Hide file tree
Showing 9 changed files with 2,234 additions and 0 deletions.
365 changes: 365 additions & 0 deletions imitation/AlphaGoZero model.ipynb

Large diffs are not rendered by default.

469 changes: 469 additions & 0 deletions imitation/Clean.ipynb

Large diffs are not rendered by default.

294 changes: 294 additions & 0 deletions imitation/Conv model.ipynb

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions imitation/Discount.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data = np.load(\"valid_100K.npz\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(108262, 11, 11, 18)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"observations = data['observations']\n",
"observations.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(108262,)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"actions = data['actions']\n",
"actions.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(108262,)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rewards = data['rewards']\n",
"rewards.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"discount = 0.99\n",
"disc_rewards = []\n",
"for r in reversed(rewards):\n",
" if r != 0:\n",
" rew = r\n",
" disc_rewards.insert(0, rew)\n",
" rew *= discount"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"np.savez_compressed(\"valid_100K_disc0.99.npz\", observations=observations, actions=actions, rewards=disc_rewards)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"discount = 1\n",
"disc_rewards = []\n",
"for r in reversed(rewards):\n",
" if r != 0:\n",
" rew = r\n",
" disc_rewards.insert(0, rew)\n",
" rew *= discount"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"np.savez_compressed(\"valid_100K_disc1.npz\", observations=observations, actions=actions, rewards=disc_rewards)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
240 changes: 240 additions & 0 deletions imitation/Fully connected model.ipynb

Large diffs are not rendered by default.

262 changes: 262 additions & 0 deletions imitation/Linear model.ipynb

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions imitation/collect_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pommerman
from pommerman import agents
import numpy as np
import argparse
from copy import deepcopy


def featurize(obs):
# TODO: history of n moves?
board = obs['board']

# convert board items into bitmaps
maps = [board == i for i in range(10)]
maps.append(obs['bomb_blast_strength'])
maps.append(obs['bomb_life'])

# duplicate ammo, blast_strength and can_kick over entire map
maps.append(np.full(board.shape, obs['ammo']))
maps.append(np.full(board.shape, obs['blast_strength']))
maps.append(np.full(board.shape, obs['can_kick']))

# add my position as bitmap
position = np.zeros(board.shape)
position[obs['position']] = 1
maps.append(position)

# add teammate
if obs['teammate'] is not None:
maps.append(board == obs['teammate'].value)
else:
maps.append(np.zeros(board.shape))

# add enemies
enemies = [board == e.value for e in obs['enemies']]
maps.append(np.any(enemies, axis=0))

return np.stack(maps, axis=2)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--num_episodes', type=int, default=1000)
parser.add_argument('--render', action="store_true", default=False)
parser.add_argument('out_file')
args = parser.parse_args()

# Create a set of agents (exactly four)
agent_list = [
agents.SimpleAgent(),
agents.SimpleAgent(),
agents.SimpleAgent(),
agents.SimpleAgent(),
]

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

observations = [[], [], [], []]
actions = [[], [], [], []]
rewards = [[], [], [], []]

# Run the episodes just like OpenAI Gym
for i in range(args.num_episodes):
obs = env.reset()
done = False
reward = [0, 0, 0, 0]
t = 0
while not done:
if args.render:
env.render()
action = env.act(obs)
new_obs, new_reward, done, info = env.step(action)
for j in range(4):
if reward[j] == 0:
observations[j].append(featurize(obs[j]))
actions[j].append(action[j])
rewards[j].append(new_reward[j])
obs = deepcopy(new_obs)
reward = deepcopy(new_reward)
t += 1
print("Episode:", i + 1, "Max length:", t, "Rewards:", reward)
env.close()

np.savez_compressed(args.out_file,
observations=sum(observations, []),
actions=sum(actions, []),
rewards=sum(rewards, []))
122 changes: 122 additions & 0 deletions imitation/eval_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pommerman
from pommerman import agents
import numpy as np
import time
from keras.models import load_model
import keras.backend as K
import tensorflow as tf
import argparse


def featurize(obs):
# TODO: history of n moves?
board = obs['board']

# convert board items into bitmaps
maps = [board == i for i in range(10)]
maps.append(obs['bomb_blast_strength'])
maps.append(obs['bomb_life'])

# duplicate ammo, blast_strength and can_kick over entire map
maps.append(np.full(board.shape, obs['ammo']))
maps.append(np.full(board.shape, obs['blast_strength']))
maps.append(np.full(board.shape, obs['can_kick']))

# add my position as bitmap
position = np.zeros(board.shape)
position[obs['position']] = 1
maps.append(position)

# add teammate
if obs['teammate'] is not None:
maps.append(board == obs['teammate'].value)
else:
maps.append(np.zeros(board.shape))

# add enemies
enemies = [board == e.value for e in obs['enemies']]
maps.append(np.any(enemies, axis=0))

return np.stack(maps, axis=2)


class KerasAgent(agents.BaseAgent):
def __init__(self, model_file):
super().__init__()
self.model = load_model(model_file)

def act(self, obs, action_space):
feat = featurize(obs)
probs, values = self.model.predict(feat[np.newaxis])
action = np.argmax(probs[0])
#print("Action:", action)
return action


def eval_model(agent_id, model_file, num_episodes):
# Create a set of agents (exactly four)
agent_list = [
agents.SimpleAgent(),
agents.SimpleAgent(),
agents.SimpleAgent(),
]
agent_list.insert(agent_id, KerasAgent(model_file))

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

rewards = []
lengths = []
start_time = time.time()
# Run the episodes just like OpenAI Gym
for i_episode in range(num_episodes):
state = env.reset()
done = False
lens = [None] * 4
t = 0
while not done:
if args.render:
env.render()
actions = env.act(state)
state, reward, done, info = env.step(actions)
for j in range(4):
if lens[j] is None and reward[j] != 0:
lens[j] = t
t += 1
rewards.append(reward)
lengths.append(lens)
print('Episode ', i_episode, "reward:", reward[agent_id], "length:", lens[agent_id])
elapsed = time.time() - start_time
env.close()
return rewards, lengths, elapsed


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('model_file')
parser.add_argument('--num_episodes', type=int, default=400)
parser.add_argument('--render', action='store_true', default=False)
args = parser.parse_args()

# make sure TF does not allocate all memory
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

rewards0, lengths0, elapsed0 = eval_model(0, args.model_file, args.num_episodes // 4)
rewards1, lengths1, elapsed1 = eval_model(1, args.model_file, args.num_episodes // 4)
rewards2, lengths2, elapsed2 = eval_model(2, args.model_file, args.num_episodes // 4)
rewards3, lengths3, elapsed3 = eval_model(3, args.model_file, args.num_episodes // 4)

rewards = [(r0[0], r1[1], r2[2], r3[3]) for r0, r1, r2, r3 in zip(rewards0, rewards1, rewards2, rewards3)]
lengths = [(l0[0], l1[1], l2[2], l3[3]) for l0, l1, l2, l3 in zip(lengths0, lengths1, lengths2, lengths3)]

print("Average reward:", np.mean(rewards))
print("Average length:", np.mean(lengths))

print("Average rewards per position:", np.mean(rewards, axis=0))
print("Average lengths per position:", np.mean(lengths, axis=0))

elapsed = elapsed0 + elapsed1 + elapsed2 + elapsed3
total_timesteps = np.sum(np.max(np.concatenate([lengths0, lengths1, lengths2, lengths3], axis=0), axis=1))
print("Time per timestep:", elapsed / total_timesteps)
Loading

0 comments on commit d566375

Please sign in to comment.