forked from hardmaru/slimevolleygym
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_ppo_pixel.py
89 lines (70 loc) · 2.34 KB
/
eval_ppo_pixel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
# test ppo1-trained CNN agent on pixel version of the task
import warnings
# numpy warnings because of tensorflow
warnings.filterwarnings("ignore", category=FutureWarning, module='tensorflow')
warnings.filterwarnings("ignore", category=UserWarning, module='gym')
import numpy as np
import argparse
import gym
import slimevolleygym
from slimevolleygym import FrameStack, render_atari
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.atari_wrappers import ClipRewardEnv, NoopResetEnv, MaxAndSkipEnv, WarpFrame
from stable_baselines import PPO1
from time import sleep
SEED = 831
RENDER_MODE = True
viewer = None
RENDER_ATARI = True # Render the game using the actual downsampled 84x84x4 greyscale inputs
cv2 = None
rendering = None
if RENDER_ATARI or RENDER_MODE:
import cv2
from gym.envs.classic_control import rendering as rendering
def make_env(seed):
env = gym.make("SlimeVolleyNoFrameskip-v0")
env = NoopResetEnv(env, noop_max=30)
env = MaxAndSkipEnv(env, skip=4)
env = WarpFrame(env)
env = FrameStack(env, 4)
env.seed(seed)
return env
def rollout(env, model):
obs = env.reset()
if RENDER_MODE:
env.render()
cumulative_reward = 0
done = False
while not done:
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
cumulative_reward += reward
if RENDER_MODE:
env.render()
if RENDER_ATARI:
viewer.imshow(render_atari(obs))
if RENDER_MODE or RENDER_ATARI:
sleep(0.08)
return cumulative_reward
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Evaluate pre-trained PPO1 CNN agent.')
parser.add_argument('--model-path', help='path to stable-baselines model.',
type=str, default="zoo/ppo_cnn/best_model.zip")
parser.add_argument('--seed', help='random seed (integer)', type=int, default=721)
args = parser.parse_args()
SEED = args.seed
env = make_env(SEED)
model = PPO1.load(args.model_path)
if RENDER_ATARI:
viewer = rendering.SimpleImageViewer(maxwidth=2160)
rewards = []
for i in range(1000):
cumulative_reward = rollout(env, model)
print(i, cumulative_reward)
rewards.append(cumulative_reward)
print("mean", np.mean(rewards))
print("stdev", np.std(rewards))
env.close()
if RENDER_ATARI:
viewer.close()