forked from pasky/pachi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
playout.c
216 lines (178 loc) · 5.44 KB
/
playout.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#define DEBUG
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "board.h"
#include "debug.h"
#include "engine.h"
#include "move.h"
#include "ownermap.h"
#include "playout.h"
/* Whether to set global debug level to the same as the playout
* has, in case it is different. This can make sure e.g. tactical
* reading produces proper level of debug prints during simulations.
* But it is safe to enable this only in single-threaded instances! */
//#define DEBUGL_BY_PLAYOUT
#define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
/* Full permit logic, ie m->coord may get changed to an alternative move */
static bool
playout_permit_move(struct playout_policy *p, struct board *b, struct move *m, bool alt)
{
coord_t coord = m->coord;
if (coord == pass || coord == resign)
return false;
if (!board_permit(b, m, NULL) ||
(p->permit && !p->permit(p, b, m, alt)))
return false;
return true;
}
/* Return coord if move is ok, an alternative move or pass if not */
static coord_t
playout_check_move(struct playout_policy *p, struct board *b, coord_t coord, enum stone color)
{
struct move m = { .coord = coord, .color = color };
if (!playout_permit_move(p, b, &m, 1))
return pass;
return m.coord;
}
/* Is *this* move permitted ?
* Called by policy permit() to check something so never the main permit() call. */
bool
playout_permit(struct playout_policy *p, struct board *b, coord_t coord, enum stone color)
{
struct move m = { .coord = coord, .color = color };
return playout_permit_move(p, b, &m, 0);
}
static bool
permit_handler(struct board *b, struct move *m, void *data)
{
struct playout_policy *policy = data;
return playout_permit_move(policy, b, m, 1);
}
coord_t
play_random_move(struct playout_setup *setup,
struct board *b, enum stone color,
struct playout_policy *policy)
{
coord_t coord = pass;
if (setup->prepolicy_hook) {
coord = setup->prepolicy_hook(policy, setup, b, color);
// fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord)) {
coord = policy->choose(policy, setup, b, color);
coord = playout_check_move(policy, b, coord, color);
// fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord) && setup->postpolicy_hook) {
coord = setup->postpolicy_hook(policy, setup, b, color);
// fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord)) {
play_random:
/* Defer to uniformly random move choice. */
/* This must never happen if the policy is tracking
* internal board state, obviously. */
assert(!policy->setboard || policy->setboard_randomok);
board_play_random(b, color, &coord, permit_handler, policy);
} else {
struct move m;
m.coord = coord; m.color = color;
if (board_play(b, &m) < 0) {
if (PLDEBUGL(4)) {
fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
coord_x(coord, b), coord_y(coord, b));
board_print(b, stderr);
}
goto play_random;
}
}
return coord;
}
int
play_random_game(struct playout_setup *setup,
struct board *b, enum stone starting_color,
struct playout_amafmap *amafmap,
struct board_ownermap *ownermap,
struct playout_policy *policy)
{
assert(setup && policy);
int gamelen = setup->gamelen - b->moves;
if (policy->setboard)
policy->setboard(policy, b);
#ifdef DEBUGL_BY_PLAYOUT
int debug_level_orig = debug_level;
debug_level = policy->debug_level;
#endif
enum stone color = starting_color;
int passes = is_pass(b->last_move.coord) && b->moves > 0;
while (gamelen-- && passes < 2) {
coord_t coord = play_random_move(setup, b, color, policy);
#if 0
/* For UCT, superko test here is downright harmful since
* in superko-likely situation we throw away literally
* 95% of our playouts; UCT will deal with this fine by
* itself. */
if (unlikely(b->superko_violation)) {
/* We ignore superko violations that are suicides. These
* are common only at the end of the game and are
* rather harmless. (They will not go through as a root
* move anyway.) */
if (group_at(b, coord)) {
if (DEBUGL(3)) {
fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
if (DEBUGL(4))
board_print(b, stderr);
}
return 0;
} else {
if (DEBUGL(6)) {
fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
board_print(b, stderr);
}
b->superko_violation = false;
}
}
#endif
if (PLDEBUGL(7)) {
fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
if (PLDEBUGL(8))
board_print(b, stderr);
}
if (unlikely(is_pass(coord))) {
passes++;
} else {
passes = 0;
}
if (amafmap) {
assert(amafmap->gamelen < MAX_GAMELEN);
amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b);
amafmap->game[amafmap->gamelen++] = coord;
}
if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
break;
color = stone_other(color);
}
floating_t score = board_fast_score(b);
int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
if (DEBUGL(6)) {
fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
if (DEBUGL(7))
board_print(b, stderr);
}
if (ownermap)
board_ownermap_fill(ownermap, b);
#ifdef DEBUGL_BY_PLAYOUT
debug_level = debug_level_orig;
#endif
return result;
}
void
playout_policy_done(struct playout_policy *p)
{
if (p->done) p->done(p);
if (p->data) free(p->data);
free(p);
}