diff --git a/usi/UctSearch.cpp b/usi/UctSearch.cpp index 98ff906a..875b47ec 100644 --- a/usi/UctSearch.cpp +++ b/usi/UctSearch.cpp @@ -1,1333 +1,1333 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Message.h" -#include "UctSearch.h" -#include "Utility.h" -#include "mate.h" -#include "nn.h" - -#if defined (_WIN32) -#define NOMINMAX -#include -#else -#include -#include -#include -#include -#endif - -#include "cppshogi.h" - -using namespace std; - -#define LOCK_NODE(var) mutex_nodes[(var)].lock() -#define UNLOCK_NODE(var) mutex_nodes[(var)].unlock() -#define LOCK_EXPAND mutex_expand.lock(); -#define UNLOCK_EXPAND mutex_expand.unlock(); - - -//////////////// -// 大域変数 // -//////////////// - -// 持ち時間 -double remaining_time[ColorNum]; -double inc_time[ColorNum]; -double po_per_sec = PLAYOUT_SPEED; - -// UCTハッシュ -UctHash* uct_hash; - -// UCTのノード -uct_node_t *uct_node; - -// プレイアウト情報 -static po_info_t po_info; - -// 試行時間を延長するかどうかのフラグ -static bool extend_time = false; -// 探索対象の局面 -const Position *pos_root; -// 現在のルートのインデックス -unsigned int current_root; - -mutex mutex_nodes[uct_hash_size]; -mutex mutex_expand; // ノード展開を排他処理するためのmutex - -// 探索の設定 -enum SEARCH_MODE mode = TIME_SETTING_WITH_BYOYOMI_MODE; -// 1手あたりの試行時間 -double const_thinking_time = CONST_TIME; -// 1手当たりのプレイアウト数 -int playout = CONST_PLAYOUT; -// デフォルトの持ち時間 -double default_remaining_time = ALL_THINKING_TIME; - -bool pondering_mode = false; - -bool pondering = false; - -bool pondering_stop = false; - -double time_limit; - -// -bool reuse_subtree = true; - -ray_clock::time_point begin_time; - -// 投了する勝率の閾値 -float RESIGN_THRESHOLD = 0.01f; - -// モデルのパス -string model_path[max_gpu]; - -// ランダム -uniform_int_distribution rnd(0, 999); - -// 末端ノードでの詰み探索の深さ(奇数であること) -const int MATE_SEARCH_DEPTH = 7; - -// 詰み探索で詰みの場合のvalue_winの定数 -const float VALUE_WIN = FLT_MAX; -const float VALUE_LOSE = -FLT_MAX; - -//template -double atomic_fetch_add(std::atomic *obj, float arg) { - float expected = obj->load(); - while (!atomic_compare_exchange_weak(obj, &expected, expected + arg)) - ; - return expected; -} - -/////////////////////// -// 古いデータの削除 // -/////////////////////// -void -UctHash::delete_hash_recursively(Position &pos, const unsigned int index) { - node_hash[index].flag = true; - used++; - - child_node_t *child_node = uct_node[index].child; - for (int i = 0; i < uct_node[index].child_num; i++) { - if (child_node[i].index != NOT_EXPANDED && node_hash[child_node[i].index].flag == false) { - StateInfo st; - pos.doMove(child_node[i].move, st); - delete_hash_recursively(pos, child_node[i].index); - pos.undoMove(child_node[i].move); - } - } -} - -void -UctHash::DeleteOldHash(const Position* pos) -{ - // 現在の局面をルートとする局面以外を削除する - unsigned int root = FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly()); - - used = 0; - for (unsigned int i = 0; i < uct_hash_size; i++) { - node_hash[i].flag = false; - } - - if (root != uct_hash_size) { - // 盤面のコピー - Position pos_copy(*pos); - delete_hash_recursively(pos_copy, root); - } - - enough_size = true; -} - -//////////// -// 関数 // -//////////// - -// Virtual Lossを加算 -static void AddVirtualLoss(child_node_t *child, unsigned int current); - -// 次のプレイアウト回数の設定 -static void CalculatePlayoutPerSec(double finish_time); -static void CalculateNextPlayouts(const Position *pos); - -// ルートの展開 -static unsigned int ExpandRoot(const Position *pos); - -// 思考時間を延長する処理 -static bool ExtendTime(void); - -// 候補手の初期化 -static void InitializeCandidate(child_node_t *uct_child, Move move); - -// 探索打ち切りの確認 -static bool InterruptionCheck(void); - -// 結果の更新 -static void UpdateResult(child_node_t *child, float result, unsigned int current); - -class UCTSearcher; -class UCTSearcherGroup { -public: - UCTSearcherGroup() : current_policy_value_queue_index(0), current_policy_value_batch_index(0), threads(0), running_threads(0), handle_eval(nullptr), nn(nullptr), y1(nullptr), y2(nullptr) { - features1[0] = features1[1] = nullptr; - features2[0] = features2[1] = nullptr; - policy_value_hash_index[0] = policy_value_hash_index[1] = nullptr; - } - ~UCTSearcherGroup() { - for (size_t i = 0; i < 2; i++) { - checkCudaErrors(cudaFreeHost(features1[i])); - checkCudaErrors(cudaFreeHost(features2[i])); - } - checkCudaErrors(cudaFreeHost(y1)); - checkCudaErrors(cudaFreeHost(y2)); - delete nn; - } - - void Initialize(const int new_thread, const int gpu_id); - void ClearEvalQueue(); - void QueuingNode(const Position *pos, unsigned int index); - void EvalNode(); - void Run(); - void Join(); - - // 実行中の探索スレッド数 - atomic running_threads; -private: - // 使用するスレッド数 - int threads; - // GPUID - int gpu_id; - - // 2つのキューを交互に使用する - int policy_value_batch_maxsize; // スレッド数以上確保する - features1_t* features1[2]; - features2_t* features2[2]; - unsigned int* policy_value_hash_index[2]; - int current_policy_value_queue_index; - int current_policy_value_batch_index; - - // UCTSearcher - vector searchers; - thread* handle_eval; - - // neural network - NN* nn; - float* y1; - float* y2; -}; -UCTSearcherGroup* search_groups; - -class UCTSearcher { -public: - UCTSearcher(UCTSearcherGroup* grp, const int thread_id) : - grp(grp), - thread_id(thread_id), - mt(new std::mt19937_64(std::chrono::system_clock::now().time_since_epoch().count() + thread_id)) {} - UCTSearcher(UCTSearcher&& o) : - grp(grp), - thread_id(thread_id), - mt(move(o.mt)) {} - - // UCT探索 - void ParallelUctSearch(); - // UCT探索(1回の呼び出しにつき, 1回の探索) - float UctSearch(Position *pos, const unsigned int current, const int depth); - // ノードの展開 - unsigned int ExpandNode(Position *pos, const int depth); - // UCB値が最大の子ノードを返す - int SelectMaxUcbChild(const Position *pos, const unsigned int current, const int depth); - // スレッド開始 - void Run() { - grp->running_threads++; - handle = new thread([this]() { this->ParallelUctSearch(); }); - } - // スレッド終了待機 - void Join() { - handle->join(); - grp->running_threads--; - delete handle; - } - -private: - UCTSearcherGroup* grp; - // スレッド識別番号 - int thread_id; - // 乱数生成器 - unique_ptr mt; - // スレッドのハンドル - thread *handle; -}; - - -void UCTSearcherGroup::ClearEvalQueue() { - current_policy_value_queue_index = 0; - current_policy_value_batch_index = 0; -} - -///////////////////// -// 予測読みの設定 // -///////////////////// -void -SetPonderingMode(bool flag) -{ - pondering_mode = flag; -} - -//////////////////////// -// 探索モードの指定 // -//////////////////////// -void -SetMode(enum SEARCH_MODE new_mode) -{ - mode = new_mode; -} -SEARCH_MODE GetMode() -{ - return mode; -} - -/////////////////////////////////////// -// 1手あたりのプレイアウト数の指定 // -/////////////////////////////////////// -void -SetPlayout(int po) -{ - playout = po; -} - - -///////////////////////////////// -// 1手にかける試行時間の設定 // -///////////////////////////////// -void -SetConstTime(double time) -{ - const_thinking_time = time; -} - - -//////////////////////////////// -// 使用するスレッド数の指定 // -//////////////////////////////// -void SetThread(const int new_thread[max_gpu]) -{ - for (int i = 0; i < max_gpu; i++) { - if (new_thread[i] > 0) - search_groups[i].Initialize(new_thread[i], i); - } -} - -void GameOver() -{ -} - -// 投了の閾値設定(1000分率) -void SetResignThreshold(const int resign_threshold) -{ - RESIGN_THRESHOLD = (float)resign_threshold / 1000.0f; -} - -void -UCTSearcherGroup::Initialize(const int new_thread, const int gpu_id) -{ - this->gpu_id = gpu_id; - if (threads != new_thread) { - threads = new_thread; - - // キューを動的に確保する - policy_value_batch_maxsize = threads; - for (size_t i = 0; i < 2; i++) { - checkCudaErrors(cudaFreeHost(features1[i])); - checkCudaErrors(cudaFreeHost(features2[i])); - delete[] policy_value_hash_index[i]; - checkCudaErrors(cudaHostAlloc(&features1[i], sizeof(features1_t) * policy_value_batch_maxsize, cudaHostAllocPortable)); - checkCudaErrors(cudaHostAlloc(&features2[i], sizeof(features2_t) * policy_value_batch_maxsize, cudaHostAllocPortable)); - policy_value_hash_index[i] = new unsigned int[policy_value_batch_maxsize]; - } - - // UCTSearcher - searchers.clear(); - searchers.reserve(threads); - for (int i = 0; i < threads; i++) { - searchers.emplace_back(this, i); - } - - checkCudaErrors(cudaFreeHost(y1)); - checkCudaErrors(cudaFreeHost(y2)); - checkCudaErrors(cudaHostAlloc(&y1, MAX_MOVE_LABEL_NUM * (int)SquareNum * threads * sizeof(float), cudaHostAllocPortable)); - checkCudaErrors(cudaHostAlloc(&y2, threads * sizeof(float), cudaHostAllocPortable)); - } -} - -// スレッド開始 -void -UCTSearcherGroup::Run() -{ - if (threads > 0) { - // 探索用スレッド - for (int i = 0; i < threads; i++) { - searchers[i].Run(); - } - - // 評価用スレッド - handle_eval = new thread([this]() { this->EvalNode(); }); - } -} - -// スレッド終了待機 -void -UCTSearcherGroup::Join() -{ - if (threads > 0) { - // 探索用スレッド - for (int i = 0; i < threads; i++) { - searchers[i].Join(); - } - - // 評価用スレッド - handle_eval->join(); - delete handle_eval; - } -} - -////////////////////// -// 持ち時間の設定 // -////////////////////// -void -SetTime(double time) -{ - default_remaining_time = time; -} -void -SetRemainingTime(double time, Color c) -{ - remaining_time[c] = time; -} -void -SetIncTime(double time, Color c) -{ - inc_time[c] = time; -} - -////////////////////////// -// ノード再利用の設定 // -////////////////////////// -void -SetReuseSubtree(bool flag) -{ - reuse_subtree = flag; -} - -////////////////////////////////////// -// time_settingsコマンドによる設定 // -////////////////////////////////////// -void -SetTimeSettings(int main_time, int byoyomi, int stone) -{ - if (main_time == 0) { - const_thinking_time = (double)byoyomi * 0.85; - mode = CONST_TIME_MODE; - cerr << "Const Thinking Time Mode" << endl; - } - else { - if (byoyomi == 0) { - default_remaining_time = main_time; - mode = TIME_SETTING_MODE; - cerr << "Time Setting Mode" << endl; - } - else { - default_remaining_time = main_time; - const_thinking_time = ((double)byoyomi) / stone; - mode = TIME_SETTING_WITH_BYOYOMI_MODE; - cerr << "Time Setting Mode (byoyomi)" << endl; - } - } -} - -///////////////////////// -// UCT探索の初期設定 // -///////////////////////// -void -InitializeUctSearch() -{ - // UCTのノードのメモリを確保 - uct_hash = new UctHash(uct_hash_size); - uct_node = new uct_node_t[uct_hash_size]; - - if (uct_node == nullptr) { - cerr << "Cannot allocate memory !!" << endl; - cerr << "You must reduce tree size !!" << endl; - exit(1); - } - - search_groups = new UCTSearcherGroup[max_gpu]; -} - -// UCT探索の終了処理 -void TerminateUctSearch() -{ - delete[] search_groups; -} - -//////////////////////// -// 探索設定の初期化 // -//////////////////////// -void -InitializeSearchSetting(void) -{ - // 持ち時間の初期化 - for (int i = 0; i < ColorNum; i++) { - remaining_time[i] = default_remaining_time; - } - - // 制限時間を設定 - // プレイアウト回数の初期化 - if (mode == CONST_PLAYOUT_MODE) { - time_limit = 100000.0; - po_info.num = playout; - extend_time = false; - } - else if (mode == CONST_TIME_MODE) { - time_limit = const_thinking_time; - po_info.num = 100000000; - extend_time = false; - } - else if (mode == TIME_SETTING_MODE || - mode == TIME_SETTING_WITH_BYOYOMI_MODE) { - time_limit = remaining_time[0]; - po_info.num = (int)(PLAYOUT_SPEED * time_limit); - extend_time = true; - } - po_per_sec = PLAYOUT_SPEED; -} - - -//////////// -// 終了 // -//////////// -void -FinalizeUctSearch(void) -{ - -} - -void -StopPondering(void) -{ - pondering_stop = true; -} - -///////////////////////////////////// -// UCTアルゴリズムによる着手生成 // -///////////////////////////////////// -Move -UctSearchGenmove(Position *pos, Move &ponderMove, bool ponder) -{ - Move move; - double finish_time; - - // ルート局面をグローバル変数に保存 - pos_root = pos; - - pondering = ponder; - pondering_stop = false; - - // 探索情報をクリア - po_info.count = 0; - - if (reuse_subtree) { - uct_hash->DeleteOldHash(pos); - } - else { - uct_hash->ClearUctHash(); - } - - // キューをクリア - for (int i = 0; i < max_gpu; i++) - search_groups[i].ClearEvalQueue(); - - // 探索開始時刻の記録 - begin_time = ray_clock::now(); - - // UCTの初期化 - current_root = ExpandRoot(pos); - - // 詰みのチェック - if (uct_node[current_root].child_num == 0) { - return Move::moveNone(); - } - else if (uct_node[current_root].value_win == VALUE_WIN) { - // 詰み - return mateMoveInOddPlyReturnMove(*pos, MATE_SEARCH_DEPTH); - } - else if (uct_node[current_root].value_win == VALUE_LOSE) { - // 自玉の詰み - return Move::moveNone(); - } - - // 前回から持ち込んだ探索回数を記録 - int pre_simulated = uct_node[current_root].move_count; - - // 探索回数の閾値を設定 - CalculateNextPlayouts(pos); - po_info.halt = po_info.num; - - // 探索時間とプレイアウト回数の予定値を出力 - PrintPlayoutLimits(time_limit, po_info.halt); - - // 探索スレッド開始 - for (int i = 0; i < max_gpu; i++) - search_groups[i].Run(); - - // 探索スレッド終了待機 - for (int i = 0; i < max_gpu; i++) - search_groups[i].Join(); - - // 着手が21手以降で, - // 時間延長を行う設定になっていて, - // 探索時間延長をすべきときは - // 探索回数を1.5倍に増やす - if (pos->gamePly() > 20 && - extend_time && - time_limit > const_thinking_time * 1.5 && - ExtendTime()) { - if (debug_message) cout << "ExtendTime" << endl; - po_info.halt = (int)(1.5 * po_info.halt); - time_limit *= 1.5; - // 探索スレッド開始 - for (int i = 0; i < max_gpu; i++) - search_groups[i].Run(); - - // 探索スレッド終了待機 - for (int i = 0; i < max_gpu; i++) - search_groups[i].Join(); - } - - // 探索にかかった時間を求める - finish_time = GetSpendTime(begin_time); - - const child_node_t* uct_child = uct_node[current_root].child; - - int max_count = 0; - unsigned int select_index = 0; - - // 探索回数最大の手を見つける - for (int i = 0; i < uct_node[current_root].child_num; i++) { - if (uct_child[i].move_count > max_count) { - select_index = i; - max_count = uct_child[i].move_count; - } - if (debug_message) cout << i << ":" << uct_child[i].move.toUSI() << " move_count:" << uct_child[i].move_count << " win_rate:" << uct_child[i].win / (uct_child[i].move_count + 0.0001f) << endl; - } - - // 選択した着手の勝率の算出 - float best_wp = uct_child[select_index].win / uct_child[select_index].move_count; - - if (best_wp <= RESIGN_THRESHOLD) { - move = Move::moveNone(); - } - else { - move = uct_child[select_index].move; - - // 歩、角、飛が成らない場合、強制的に成る - if (!move.isDrop() && !move.isPromotion() && - (move.pieceTypeTo() == Pawn || move.pieceTypeTo() == Bishop || move.pieceTypeTo() == Rook)) { - // 合法手に成る手があるか - for (int i = 0; i < uct_node[current_root].child_num; i++) { - if (uct_child[i].move.isPromotion() && uct_child[i].move.fromAndTo() == move.fromAndTo()) { - // 強制的に成る - move = uct_child[i].move; - break; - } - } - } - - int cp; - if (best_wp == 1.0f) { - cp = 30000; - } - else { - cp = int(-logf(1.0f / best_wp - 1.0f) * 756.0864962951762f); - } - - // PV表示 - string pv = move.toUSI(); - { - unsigned int best_index = select_index; - const child_node_t *best_node = uct_child; - - while (best_node[best_index].index != NOT_EXPANDED) { - const int best_node_index = best_node[best_index].index; - - best_node = uct_node[best_node_index].child; - max_count = 0; - for (int i = 0; i < uct_node[best_node_index].child_num; i++) { - if (best_node[i].move_count > max_count) { - best_index = i; - max_count = best_node[i].move_count; - } - } - - if (max_count < 80) - break; - - pv += " " + best_node[best_index].move.toUSI(); - - // ponderの着手 - if (pondering_mode && ponderMove == Move::moveNone()) - ponderMove = best_node[best_index].move; - } - } - - if (!pondering) - cout << "info nps " << int((uct_node[current_root].move_count - pre_simulated) / finish_time) << " time " << int(finish_time * 1000) << " nodes " << uct_node[current_root].move_count << " hashfull " << uct_hash->GetUctHashUsageRate() << " score cp " << cp << " pv " << pv << endl; - - // 次の探索でのプレイアウト回数の算出 - CalculatePlayoutPerSec(finish_time); - - if (!pondering) - remaining_time[pos->turn()] -= finish_time; - } - - // 最善応手列を出力 - //PrintBestSequence(pos, uct_node, current_root); - // 探索の情報を出力(探索回数, 勝敗, 思考時間, 勝率, 探索速度) - if (debug_message) PrintPlayoutInformation(&uct_node[current_root], &po_info, finish_time, pre_simulated); - - return move; -} - - -///////////////////// -// 候補手の初期化 // -///////////////////// -static void -InitializeCandidate(child_node_t *uct_child, Move move) -{ - uct_child->move = move; - uct_child->move_count = 0; - uct_child->win = 0; - uct_child->index = NOT_EXPANDED; - uct_child->nnrate = 0; -} - - -///////////////////////// -// ルートノードの展開 // -///////////////////////// -static unsigned int -ExpandRoot(const Position *pos) -{ - unsigned int index = uct_hash->FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly()); - child_node_t *uct_child; - int child_num = 0; - - // 既に展開されていた時は, 探索結果を再利用する - if (index != uct_hash_size) { - PrintReuseCount(uct_node[index].move_count); - - return index; - } - else { - // 空のインデックスを探す - index = uct_hash->SearchEmptyIndex(pos->getKey(), pos->turn(), pos->gamePly()); - - assert(index != uct_hash_size); - - // ルートノードの初期化 - uct_node[index].move_count = 0; - uct_node[index].win = 0; - uct_node[index].child_num = 0; - uct_node[index].evaled = 0; - uct_node[index].value_win = 0.0f; - - uct_child = uct_node[index].child; - - // 候補手の展開 - for (MoveList ml(*pos); !ml.end(); ++ml) { - InitializeCandidate(&uct_child[child_num], ml.move()); - child_num++; - } - - // 子ノード個数の設定 - uct_node[index].child_num = child_num; - - // ノードをキューに追加 - search_groups[0].QueuingNode(pos, index); - - } - - return index; -} - - - -/////////////////// -// ノードの展開 // -/////////////////// -unsigned int -UCTSearcher::ExpandNode(Position *pos, const int depth) -{ - unsigned int index = uct_hash->FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly() + depth); - child_node_t *uct_child; - - // 合流先が検知できれば, それを返す - if (index != uct_hash_size) { - return index; - } - - // 空のインデックスを探す - index = uct_hash->SearchEmptyIndex(pos->getKey(), pos->turn(), pos->gamePly() + depth); - - assert(index != uct_hash_size); - - // 現在のノードの初期化 - uct_node[index].move_count = 0; - uct_node[index].win = 0; - uct_node[index].child_num = 0; - uct_node[index].evaled = 0; - uct_node[index].value_win = 0.0f; - uct_child = uct_node[index].child; - - // 候補手の展開 - int child_num = 0; - for (MoveList ml(*pos); !ml.end(); ++ml) { - InitializeCandidate(&uct_child[child_num], ml.move()); - child_num++; - } - - // 子ノードの個数を設定 - uct_node[index].child_num = child_num; - - // ノードをキューに追加 - if (child_num > 0) { - grp->QueuingNode(pos, index); - } - else { - uct_node[index].value_win = 0.0f; - uct_node[index].evaled = 1; - } - - return index; -} - - -////////////////////////////////////// -// ノードをキューに追加 // -////////////////////////////////////// -void -UCTSearcherGroup::QueuingNode(const Position *pos, unsigned int index) -{ - //cout << "QueuingNode:" << index << ":" << current_policy_value_queue_index << ":" << current_policy_value_batch_index << endl; - //cout << pos->toSFEN() << endl; - - /* if (current_policy_value_batch_index >= policy_value_batch_maxsize) { - std::cout << "error" << std::endl; - }*/ - // set all zero - std::fill_n((float*)features1[current_policy_value_queue_index][current_policy_value_batch_index], sizeof(features1_t) / sizeof(float), 0.0f); - std::fill_n((float*)features2[current_policy_value_queue_index][current_policy_value_batch_index], sizeof(features2_t) / sizeof(float), 0.0f); - - make_input_features(*pos, &features1[current_policy_value_queue_index][current_policy_value_batch_index], &features2[current_policy_value_queue_index][current_policy_value_batch_index]); - policy_value_hash_index[current_policy_value_queue_index][current_policy_value_batch_index] = index; - current_policy_value_batch_index++; -} - - -////////////////////////// -// 探索打ち止めの確認 // -////////////////////////// -static bool -InterruptionCheck(void) -{ - if (pondering) - return pondering_stop; - - int max = 0, second = 0; - const int child_num = uct_node[current_root].child_num; - const int rest = po_info.halt - po_info.count; - const child_node_t *uct_child = uct_node[current_root].child; - - if (mode != CONST_PLAYOUT_MODE && - GetSpendTime(begin_time) * 10.0 < time_limit) { - return false; - } - - // 探索回数が最も多い手と次に多い手を求める - for (int i = 0; i < child_num; i++) { - if (uct_child[i].move_count > max) { - second = max; - max = uct_child[i].move_count; - } - else if (uct_child[i].move_count > second) { - second = uct_child[i].move_count; - } - } - - // 残りの探索を全て次善手に費やしても - // 最善手を超えられない場合は探索を打ち切る - if (max - second > rest) { - return true; - } - else { - return false; - } -} - - -/////////////////////////// -// 思考時間延長の確認 // -/////////////////////////// -static bool -ExtendTime(void) -{ - int max = 0, second = 0; - const int child_num = uct_node[current_root].child_num; - const child_node_t *uct_child = uct_node[current_root].child; - - // 探索回数が最も多い手と次に多い手を求める - for (int i = 0; i < child_num; i++) { - if (uct_child[i].move_count > max) { - second = max; - max = uct_child[i].move_count; - } - else if (uct_child[i].move_count > second) { - second = uct_child[i].move_count; - } - } - - // 最善手の探索回数がが次善手の探索回数の - // 1.2倍未満なら探索延長 - if (max < second * 1.2) { - return true; - } - else { - return false; - } -} - - - -///////////////////////////////// -// 並列処理で呼び出す関数 // -// UCTアルゴリズムを反復する // -///////////////////////////////// -void -UCTSearcher::ParallelUctSearch() -{ - bool interruption = false; - bool enough_size = true; - - // 探索回数が閾値を超える, または探索が打ち切られたらループを抜ける - do { - // 探索回数を1回増やす - atomic_fetch_add(&po_info.count, 1); - // 盤面のコピー - Position pos(*pos_root); - //cout << pos.toSFEN() << ":" << pos.getKey() << endl; - // 1回プレイアウトする - UctSearch(&pos, current_root, 0); - //cout << "root:" << current_root << " move_count:" << uct_node[current_root].move_count << endl; - // 探索を打ち切るか確認 - interruption = InterruptionCheck(); - // ハッシュに余裕があるか確認 - enough_size = uct_hash->CheckRemainingHashSize(); - if (!pondering && GetSpendTime(begin_time) > time_limit) break; - } while (po_info.count < po_info.halt && !interruption && enough_size); - - return; -} - - -////////////////////////////////////////////// -// UCT探索を行う関数 // -// 1回の呼び出しにつき, 1プレイアウトする // -////////////////////////////////////////////// -float -UCTSearcher::UctSearch(Position *pos, const unsigned int current, const int depth) -{ - // 詰みのチェック - if (uct_node[current].child_num == 0) { - return 1.0f; // 反転して値を返すため1を返す - } - else if (uct_node[current].value_win == VALUE_WIN) { - // 詰み - return 0.0f; // 反転して値を返すため0を返す - } - else if (uct_node[current].value_win == VALUE_LOSE) { - // 自玉の詰み - return 1.0f; // 反転して値を返すため1を返す - } - - // 千日手チェック - if (uct_node[current].evaled == 2) { - switch (pos->isDraw(16)) { - case NotRepetition: break; - case RepetitionDraw: return 0.5f; - case RepetitionWin: return 0.0f; - case RepetitionLose: return 1.0f; - case RepetitionSuperior: return 0.0f; - case RepetitionInferior: return 1.0f; - default: UNREACHABLE; - } - } - - // policyが計算されるのを待つ(他のスレッドが同じノードを先に展開した場合、nnの計算を待つ必要がある) - while (uct_node[current].evaled == 0) - this_thread::yield(); - - float result; - unsigned int next_index; - double score; - child_node_t *uct_child = uct_node[current].child; - - // 現在見ているノードをロック - LOCK_NODE(current); - // UCB値最大の手を求める - next_index = SelectMaxUcbChild(pos, current, depth); - // 選んだ手を着手 - StateInfo st; - pos->doMove(uct_child[next_index].move, st); - - // Virtual Lossを加算 - AddVirtualLoss(&uct_child[next_index], current); - // ノードの展開の確認 - if (uct_child[next_index].index == NOT_EXPANDED ) { - // ノードの展開中はロック - LOCK_EXPAND; - // ノードの展開 - // ノード展開処理の中でvalueを計算する - unsigned int child_index = ExpandNode(pos, depth + 1); - uct_child[next_index].index = child_index; - //cerr << "value evaluated " << result << " " << v << " " << *value_result << endl; - // ノード展開のロックの解除 - UNLOCK_EXPAND; - - // 現在見ているノードのロックを解除 - UNLOCK_NODE(current); - - // 詰みチェック(ValueNet計算中にチェック) - int isMate = 0; - if (!pos->inCheck()) { - if (mateMoveInOddPly(*pos, MATE_SEARCH_DEPTH)) { - isMate = 1; - } - } - else { - if (mateMoveInEvenPly(*pos, MATE_SEARCH_DEPTH - 1)) { - isMate = -1; - } - } - - // 千日手チェック - int isDraw = 0; - switch (pos->isDraw(16)) { - case NotRepetition: break; - case RepetitionDraw: isDraw = 2; break; // Draw - case RepetitionWin: isDraw = 1; break; - case RepetitionLose: isDraw = -1; break; - case RepetitionSuperior: isDraw = 1; break; - case RepetitionInferior: isDraw = -1; break; - default: UNREACHABLE; - } - - // valueが計算されるのを待つ - //cout << "wait value:" << child_index << ":" << uct_node[child_index].evaled << endl; - while (uct_node[child_index].evaled == 0) - this_thread::yield(); - - // 千日手の場合、ValueNetの値を使用しない(経路によって判定が異なるため上書きはしない) - if (isDraw != 0) { - uct_node[child_index].evaled = 2; - if (isDraw == 1) { - result = 0.0f; - } - else if (isDraw == -1) { - result = 1.0f; - } - else { - result = 0.5f; - } - - } - // 詰みの場合、ValueNetの値を上書き - else if (isMate == 1) { - uct_node[child_index].value_win = VALUE_WIN; - result = 0.0f; - } - else if (isMate == -1) { - uct_node[child_index].value_win = VALUE_LOSE; - result = 1.0f; - } - else { - // valueを勝敗として返す - result = 1 - uct_node[child_index].value_win; - } - } - else { - // 現在見ているノードのロックを解除 - UNLOCK_NODE(current); - - // 手番を入れ替えて1手深く読む - result = UctSearch(pos, uct_child[next_index].index, depth + 1); - } - - // 探索結果の反映 - UpdateResult(&uct_child[next_index], result, current); - - return 1 - result; -} - - -////////////////////////// -// Virtual Lossの加算 // -////////////////////////// -static void -AddVirtualLoss(child_node_t *child, unsigned int current) -{ - atomic_fetch_add(&uct_node[current].move_count, VIRTUAL_LOSS); - atomic_fetch_add(&child->move_count, VIRTUAL_LOSS); -} - - -////////////////////// -// 探索結果の更新 // -///////////////////// -static void -UpdateResult(child_node_t *child, float result, unsigned int current) -{ - atomic_fetch_add(&uct_node[current].win, result); - atomic_fetch_add(&uct_node[current].move_count, 1 - VIRTUAL_LOSS); - atomic_fetch_add(&child->win, result); - atomic_fetch_add(&child->move_count, 1 - VIRTUAL_LOSS); -} - -// ディリクレ分布 -void random_dirichlet(std::mt19937_64 &mt, float *x, const int size) { - const float dirichlet_alpha = 0.15f; - static std::gamma_distribution gamma(dirichlet_alpha, 1.0f); - - float sum_y = 0; - for (int i = 0; i < size; i++) { - float y = gamma(mt); - sum_y += y; - x[i] = y; - } - std::for_each(x, x + size, [sum_y](float &v) mutable { v /= sum_y; }); -} - -///////////////////////////////////////////////////// -// UCBが最大となる子ノードのインデックスを返す関数 // -///////////////////////////////////////////////////// -int -UCTSearcher::SelectMaxUcbChild(const Position *pos, const unsigned int current, const int depth) -{ - const child_node_t *uct_child = uct_node[current].child; - const int child_num = uct_node[current].child_num; - int max_child = 0; - const int sum = uct_node[current].move_count; - float q, u, max_value; - float ucb_value; - unsigned int max_index; - //const bool debug = GetDebugMessageMode() && current == current_root && sum % 100 == 0; - - max_value = -1; - - // UCB値最大の手を求める - for (int i = 0; i < child_num; i++) { - float win = uct_child[i].win; - int move_count = uct_child[i].move_count; - - // evaled - /*if (debug) { - cerr << i << ":"; - cerr << uct_node[current].move_count << " "; - cerr << setw(3) << uct_child[i].move.toUSI(); - cerr << ": move " << setw(5) << move_count << " policy " - << setw(10) << uct_child[i].nnrate << " "; - }*/ - if (move_count == 0) { - q = 0.5f; - u = 1.0f; - } - else { - q = win / move_count; - u = sqrtf(sum) / (1 + move_count); - } - - float rate = max(uct_child[i].nnrate, 0.01f); - // ランダムに確率を上げる - if (depth == 0 && rnd(*mt) <= 2) { - rate = (rate + 1.0f) / 2.0f; - } - else if (depth < 4 && depth % 2 == 0 && rnd(*mt) == 0) { - rate = std::min(rate * 1.5f, 1.0f); - } - - ucb_value = q + c_puct * u * rate; - - /*if (debug) { - cerr << " Q:" << q << " U:" << c_puct * u * rate << " UCB:" << ucb_value << endl; - }*/ - - if (ucb_value > max_value) { - max_value = ucb_value; - max_child = i; - } - } - - /*if (debug) { - cerr << "select node:" << current << " child:" << max_child << endl; - }*/ - - return max_child; -} - - -///////////////////////////////// -// 次のプレイアウト回数の設定 // -///////////////////////////////// -static void -CalculatePlayoutPerSec(double finish_time) -{ - if (finish_time != 0.0) { - po_per_sec = po_info.count / finish_time; - } - else { - po_per_sec = PLAYOUT_SPEED; - } -} - -static void -CalculateNextPlayouts(const Position *pos) -{ - if (pondering) { - po_info.num = uct_hash_size; - return; - } - - int color = pos->turn(); - - // 探索の時の探索回数を求める - if (mode == CONST_TIME_MODE) { - po_info.num = (int)(po_per_sec * const_thinking_time); - } - else if (mode == TIME_SETTING_MODE || - mode == TIME_SETTING_WITH_BYOYOMI_MODE) { - time_limit = remaining_time[color] / (16 + max(0, 30 - pos->gamePly())) + inc_time[color]; - if (mode == TIME_SETTING_WITH_BYOYOMI_MODE && - time_limit < (const_thinking_time)) { - time_limit = const_thinking_time; - } - po_info.num = (int)(po_per_sec * time_limit); - } -} - -void SetModelPath(const std::string path[max_gpu]) -{ - for (int i = 0; i < max_gpu; i++) { - if (path[i] == "") - model_path[i] = path[0]; - else - model_path[i] = path[i]; - } -} - -void UCTSearcherGroup::EvalNode() { - cudaSetDevice(gpu_id); - - if (nn == nullptr) { - nn = new NN(threads); - nn->load_model(model_path[gpu_id].c_str()); - } - - bool enough_batch_size = true; // 初回はルートノードのため待たない - while (true) { - LOCK_EXPAND; - if (running_threads == 0 && current_policy_value_batch_index == 0) { - UNLOCK_EXPAND; - break; - } - - if (running_threads == 0) { - UNLOCK_EXPAND; - this_thread::yield(); - } - - if (running_threads > 0 && (current_policy_value_batch_index == 0 || !enough_batch_size && current_policy_value_batch_index < running_threads * 0.9)) { - UNLOCK_EXPAND; - this_thread::sleep_for(chrono::milliseconds(1)); - enough_batch_size = true; - } - else { - enough_batch_size = false; - int policy_value_batch_size = current_policy_value_batch_index; - int policy_value_queue_index = current_policy_value_queue_index; - current_policy_value_batch_index = 0; - current_policy_value_queue_index = current_policy_value_queue_index ^ 1; - UNLOCK_EXPAND; - //std::cout << policy_value_batch_size << std::endl; - - // predict - nn->foward(policy_value_batch_size, features1[policy_value_queue_index], features2[policy_value_queue_index], y1, y2); - - const float(*logits)[MAX_MOVE_LABEL_NUM * SquareNum] = reinterpret_cast(y1); - const float *value = reinterpret_cast(y2); - - for (int i = 0; i < policy_value_batch_size; i++, logits++, value++) { - const unsigned int index = policy_value_hash_index[policy_value_queue_index][i]; - - /*if (index == current_root) { - string str; - for (int sq = 0; sq < SquareNum; sq++) { - str += to_string((int)features1[policy_value_queue_index][i][0][0][sq]); - str += " "; - } - cout << str << endl; - }*/ - - LOCK_NODE(index); - - const int child_num = uct_node[index].child_num; - child_node_t *uct_child = uct_node[index].child; - Color color = (Color)(*uct_hash)[index].color; - - // 合法手一覧 - std::vector legal_move_probabilities; - legal_move_probabilities.reserve(child_num); - for (int j = 0; j < child_num; j++) { - Move move = uct_child[j].move; - const int move_label = make_move_label((u16)move.proFromAndTo(), color); - legal_move_probabilities.emplace_back((*logits)[move_label]); - } - - // Boltzmann distribution - softmax_tempature_with_normalize(legal_move_probabilities); - - for (int j = 0; j < child_num; j++) { - uct_child[j].nnrate = legal_move_probabilities[j]; - } - - uct_node[index].value_win = *value; - uct_node[index].evaled = 1; - UNLOCK_NODE(index); - } - } - } -} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Message.h" +#include "UctSearch.h" +#include "Utility.h" +#include "mate.h" +#include "nn.h" + +#if defined (_WIN32) +#define NOMINMAX +#include +#else +#include +#include +#include +#include +#endif + +#include "cppshogi.h" + +using namespace std; + +#define LOCK_NODE(var) mutex_nodes[(var)].lock() +#define UNLOCK_NODE(var) mutex_nodes[(var)].unlock() +#define LOCK_EXPAND mutex_expand.lock(); +#define UNLOCK_EXPAND mutex_expand.unlock(); + + +//////////////// +// 大域変数 // +//////////////// + +// 持ち時間 +double remaining_time[ColorNum]; +double inc_time[ColorNum]; +double po_per_sec = PLAYOUT_SPEED; + +// UCTハッシュ +UctHash* uct_hash; + +// UCTのノード +uct_node_t *uct_node; + +// プレイアウト情報 +static po_info_t po_info; + +// 試行時間を延長するかどうかのフラグ +static bool extend_time = false; +// 探索対象の局面 +const Position *pos_root; +// 現在のルートのインデックス +unsigned int current_root; + +mutex mutex_nodes[uct_hash_size]; +mutex mutex_expand; // ノード展開を排他処理するためのmutex + +// 探索の設定 +enum SEARCH_MODE mode = TIME_SETTING_WITH_BYOYOMI_MODE; +// 1手あたりの試行時間 +double const_thinking_time = CONST_TIME; +// 1手当たりのプレイアウト数 +int playout = CONST_PLAYOUT; +// デフォルトの持ち時間 +double default_remaining_time = ALL_THINKING_TIME; + +bool pondering_mode = false; + +bool pondering = false; + +bool pondering_stop = false; + +double time_limit; + +// +bool reuse_subtree = true; + +ray_clock::time_point begin_time; + +// 投了する勝率の閾値 +float RESIGN_THRESHOLD = 0.01f; + +// モデルのパス +string model_path[max_gpu]; + +// ランダム +uniform_int_distribution rnd(0, 999); + +// 末端ノードでの詰み探索の深さ(奇数であること) +const int MATE_SEARCH_DEPTH = 7; + +// 詰み探索で詰みの場合のvalue_winの定数 +const float VALUE_WIN = FLT_MAX; +const float VALUE_LOSE = -FLT_MAX; + +//template +double atomic_fetch_add(std::atomic *obj, float arg) { + float expected = obj->load(); + while (!atomic_compare_exchange_weak(obj, &expected, expected + arg)) + ; + return expected; +} + +/////////////////////// +// 古いデータの削除 // +/////////////////////// +void +UctHash::delete_hash_recursively(Position &pos, const unsigned int index) { + node_hash[index].flag = true; + used++; + + child_node_t *child_node = uct_node[index].child; + for (int i = 0; i < uct_node[index].child_num; i++) { + if (child_node[i].index != NOT_EXPANDED && node_hash[child_node[i].index].flag == false) { + StateInfo st; + pos.doMove(child_node[i].move, st); + delete_hash_recursively(pos, child_node[i].index); + pos.undoMove(child_node[i].move); + } + } +} + +void +UctHash::DeleteOldHash(const Position* pos) +{ + // 現在の局面をルートとする局面以外を削除する + unsigned int root = FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly()); + + used = 0; + for (unsigned int i = 0; i < uct_hash_size; i++) { + node_hash[i].flag = false; + } + + if (root != uct_hash_size) { + // 盤面のコピー + Position pos_copy(*pos); + delete_hash_recursively(pos_copy, root); + } + + enough_size = true; +} + +//////////// +// 関数 // +//////////// + +// Virtual Lossを加算 +static void AddVirtualLoss(child_node_t *child, unsigned int current); + +// 次のプレイアウト回数の設定 +static void CalculatePlayoutPerSec(double finish_time); +static void CalculateNextPlayouts(const Position *pos); + +// ルートの展開 +static unsigned int ExpandRoot(const Position *pos); + +// 思考時間を延長する処理 +static bool ExtendTime(void); + +// 候補手の初期化 +static void InitializeCandidate(child_node_t *uct_child, Move move); + +// 探索打ち切りの確認 +static bool InterruptionCheck(void); + +// 結果の更新 +static void UpdateResult(child_node_t *child, float result, unsigned int current); + +class UCTSearcher; +class UCTSearcherGroup { +public: + UCTSearcherGroup() : current_policy_value_queue_index(0), current_policy_value_batch_index(0), threads(0), running_threads(0), handle_eval(nullptr), nn(nullptr), y1(nullptr), y2(nullptr) { + features1[0] = features1[1] = nullptr; + features2[0] = features2[1] = nullptr; + policy_value_hash_index[0] = policy_value_hash_index[1] = nullptr; + } + ~UCTSearcherGroup() { + for (size_t i = 0; i < 2; i++) { + checkCudaErrors(cudaFreeHost(features1[i])); + checkCudaErrors(cudaFreeHost(features2[i])); + } + checkCudaErrors(cudaFreeHost(y1)); + checkCudaErrors(cudaFreeHost(y2)); + delete nn; + } + + void Initialize(const int new_thread, const int gpu_id); + void ClearEvalQueue(); + void QueuingNode(const Position *pos, unsigned int index); + void EvalNode(); + void Run(); + void Join(); + + // 実行中の探索スレッド数 + atomic running_threads; +private: + // 使用するスレッド数 + int threads; + // GPUID + int gpu_id; + + // 2つのキューを交互に使用する + int policy_value_batch_maxsize; // スレッド数以上確保する + features1_t* features1[2]; + features2_t* features2[2]; + unsigned int* policy_value_hash_index[2]; + int current_policy_value_queue_index; + int current_policy_value_batch_index; + + // UCTSearcher + vector searchers; + thread* handle_eval; + + // neural network + NN* nn; + float* y1; + float* y2; +}; +UCTSearcherGroup* search_groups; + +class UCTSearcher { +public: + UCTSearcher(UCTSearcherGroup* grp, const int thread_id) : + grp(grp), + thread_id(thread_id), + mt(new std::mt19937_64(std::chrono::system_clock::now().time_since_epoch().count() + thread_id)) {} + UCTSearcher(UCTSearcher&& o) : + grp(grp), + thread_id(thread_id), + mt(move(o.mt)) {} + + // UCT探索 + void ParallelUctSearch(); + // UCT探索(1回の呼び出しにつき, 1回の探索) + float UctSearch(Position *pos, const unsigned int current, const int depth); + // ノードの展開 + unsigned int ExpandNode(Position *pos, const int depth); + // UCB値が最大の子ノードを返す + int SelectMaxUcbChild(const Position *pos, const unsigned int current, const int depth); + // スレッド開始 + void Run() { + grp->running_threads++; + handle = new thread([this]() { this->ParallelUctSearch(); }); + } + // スレッド終了待機 + void Join() { + handle->join(); + grp->running_threads--; + delete handle; + } + +private: + UCTSearcherGroup* grp; + // スレッド識別番号 + int thread_id; + // 乱数生成器 + unique_ptr mt; + // スレッドのハンドル + thread *handle; +}; + + +void UCTSearcherGroup::ClearEvalQueue() { + current_policy_value_queue_index = 0; + current_policy_value_batch_index = 0; +} + +///////////////////// +// 予測読みの設定 // +///////////////////// +void +SetPonderingMode(bool flag) +{ + pondering_mode = flag; +} + +//////////////////////// +// 探索モードの指定 // +//////////////////////// +void +SetMode(enum SEARCH_MODE new_mode) +{ + mode = new_mode; +} +SEARCH_MODE GetMode() +{ + return mode; +} + +/////////////////////////////////////// +// 1手あたりのプレイアウト数の指定 // +/////////////////////////////////////// +void +SetPlayout(int po) +{ + playout = po; +} + + +///////////////////////////////// +// 1手にかける試行時間の設定 // +///////////////////////////////// +void +SetConstTime(double time) +{ + const_thinking_time = time; +} + + +//////////////////////////////// +// 使用するスレッド数の指定 // +//////////////////////////////// +void SetThread(const int new_thread[max_gpu]) +{ + for (int i = 0; i < max_gpu; i++) { + if (new_thread[i] > 0) + search_groups[i].Initialize(new_thread[i], i); + } +} + +void GameOver() +{ +} + +// 投了の閾値設定(1000分率) +void SetResignThreshold(const int resign_threshold) +{ + RESIGN_THRESHOLD = (float)resign_threshold / 1000.0f; +} + +void +UCTSearcherGroup::Initialize(const int new_thread, const int gpu_id) +{ + this->gpu_id = gpu_id; + if (threads != new_thread) { + threads = new_thread; + + // キューを動的に確保する + policy_value_batch_maxsize = threads; + for (size_t i = 0; i < 2; i++) { + checkCudaErrors(cudaFreeHost(features1[i])); + checkCudaErrors(cudaFreeHost(features2[i])); + delete[] policy_value_hash_index[i]; + checkCudaErrors(cudaHostAlloc(&features1[i], sizeof(features1_t) * policy_value_batch_maxsize, cudaHostAllocPortable)); + checkCudaErrors(cudaHostAlloc(&features2[i], sizeof(features2_t) * policy_value_batch_maxsize, cudaHostAllocPortable)); + policy_value_hash_index[i] = new unsigned int[policy_value_batch_maxsize]; + } + + // UCTSearcher + searchers.clear(); + searchers.reserve(threads); + for (int i = 0; i < threads; i++) { + searchers.emplace_back(this, i); + } + + checkCudaErrors(cudaFreeHost(y1)); + checkCudaErrors(cudaFreeHost(y2)); + checkCudaErrors(cudaHostAlloc(&y1, MAX_MOVE_LABEL_NUM * (int)SquareNum * threads * sizeof(float), cudaHostAllocPortable)); + checkCudaErrors(cudaHostAlloc(&y2, threads * sizeof(float), cudaHostAllocPortable)); + } +} + +// スレッド開始 +void +UCTSearcherGroup::Run() +{ + if (threads > 0) { + // 探索用スレッド + for (int i = 0; i < threads; i++) { + searchers[i].Run(); + } + + // 評価用スレッド + handle_eval = new thread([this]() { this->EvalNode(); }); + } +} + +// スレッド終了待機 +void +UCTSearcherGroup::Join() +{ + if (threads > 0) { + // 探索用スレッド + for (int i = 0; i < threads; i++) { + searchers[i].Join(); + } + + // 評価用スレッド + handle_eval->join(); + delete handle_eval; + } +} + +////////////////////// +// 持ち時間の設定 // +////////////////////// +void +SetTime(double time) +{ + default_remaining_time = time; +} +void +SetRemainingTime(double time, Color c) +{ + remaining_time[c] = time; +} +void +SetIncTime(double time, Color c) +{ + inc_time[c] = time; +} + +////////////////////////// +// ノード再利用の設定 // +////////////////////////// +void +SetReuseSubtree(bool flag) +{ + reuse_subtree = flag; +} + +////////////////////////////////////// +// time_settingsコマンドによる設定 // +////////////////////////////////////// +void +SetTimeSettings(int main_time, int byoyomi, int stone) +{ + if (main_time == 0) { + const_thinking_time = (double)byoyomi * 0.85; + mode = CONST_TIME_MODE; + cerr << "Const Thinking Time Mode" << endl; + } + else { + if (byoyomi == 0) { + default_remaining_time = main_time; + mode = TIME_SETTING_MODE; + cerr << "Time Setting Mode" << endl; + } + else { + default_remaining_time = main_time; + const_thinking_time = ((double)byoyomi) / stone; + mode = TIME_SETTING_WITH_BYOYOMI_MODE; + cerr << "Time Setting Mode (byoyomi)" << endl; + } + } +} + +///////////////////////// +// UCT探索の初期設定 // +///////////////////////// +void +InitializeUctSearch() +{ + // UCTのノードのメモリを確保 + uct_hash = new UctHash(uct_hash_size); + uct_node = new uct_node_t[uct_hash_size]; + + if (uct_node == nullptr) { + cerr << "Cannot allocate memory !!" << endl; + cerr << "You must reduce tree size !!" << endl; + exit(1); + } + + search_groups = new UCTSearcherGroup[max_gpu]; +} + +// UCT探索の終了処理 +void TerminateUctSearch() +{ + delete[] search_groups; +} + +//////////////////////// +// 探索設定の初期化 // +//////////////////////// +void +InitializeSearchSetting(void) +{ + // 持ち時間の初期化 + for (int i = 0; i < ColorNum; i++) { + remaining_time[i] = default_remaining_time; + } + + // 制限時間を設定 + // プレイアウト回数の初期化 + if (mode == CONST_PLAYOUT_MODE) { + time_limit = 100000.0; + po_info.num = playout; + extend_time = false; + } + else if (mode == CONST_TIME_MODE) { + time_limit = const_thinking_time; + po_info.num = 100000000; + extend_time = false; + } + else if (mode == TIME_SETTING_MODE || + mode == TIME_SETTING_WITH_BYOYOMI_MODE) { + time_limit = remaining_time[0]; + po_info.num = (int)(PLAYOUT_SPEED * time_limit); + extend_time = true; + } + po_per_sec = PLAYOUT_SPEED; +} + + +//////////// +// 終了 // +//////////// +void +FinalizeUctSearch(void) +{ + +} + +void +StopPondering(void) +{ + pondering_stop = true; +} + +///////////////////////////////////// +// UCTアルゴリズムによる着手生成 // +///////////////////////////////////// +Move +UctSearchGenmove(Position *pos, Move &ponderMove, bool ponder) +{ + Move move; + double finish_time; + + // ルート局面をグローバル変数に保存 + pos_root = pos; + + pondering = ponder; + pondering_stop = false; + + // 探索情報をクリア + po_info.count = 0; + + if (reuse_subtree) { + uct_hash->DeleteOldHash(pos); + } + else { + uct_hash->ClearUctHash(); + } + + // キューをクリア + for (int i = 0; i < max_gpu; i++) + search_groups[i].ClearEvalQueue(); + + // 探索開始時刻の記録 + begin_time = ray_clock::now(); + + // UCTの初期化 + current_root = ExpandRoot(pos); + + // 詰みのチェック + if (uct_node[current_root].child_num == 0) { + return Move::moveNone(); + } + else if (uct_node[current_root].value_win == VALUE_WIN) { + // 詰み + return mateMoveInOddPlyReturnMove(*pos, MATE_SEARCH_DEPTH); + } + else if (uct_node[current_root].value_win == VALUE_LOSE) { + // 自玉の詰み + return Move::moveNone(); + } + + // 前回から持ち込んだ探索回数を記録 + int pre_simulated = uct_node[current_root].move_count; + + // 探索回数の閾値を設定 + CalculateNextPlayouts(pos); + po_info.halt = po_info.num; + + // 探索時間とプレイアウト回数の予定値を出力 + PrintPlayoutLimits(time_limit, po_info.halt); + + // 探索スレッド開始 + for (int i = 0; i < max_gpu; i++) + search_groups[i].Run(); + + // 探索スレッド終了待機 + for (int i = 0; i < max_gpu; i++) + search_groups[i].Join(); + + // 着手が21手以降で, + // 時間延長を行う設定になっていて, + // 探索時間延長をすべきときは + // 探索回数を1.5倍に増やす + if (pos->gamePly() > 20 && + extend_time && + time_limit > const_thinking_time * 1.5 && + ExtendTime()) { + if (debug_message) cout << "ExtendTime" << endl; + po_info.halt = (int)(1.5 * po_info.halt); + time_limit *= 1.5; + // 探索スレッド開始 + for (int i = 0; i < max_gpu; i++) + search_groups[i].Run(); + + // 探索スレッド終了待機 + for (int i = 0; i < max_gpu; i++) + search_groups[i].Join(); + } + + // 探索にかかった時間を求める + finish_time = GetSpendTime(begin_time); + + const child_node_t* uct_child = uct_node[current_root].child; + + int max_count = 0; + unsigned int select_index = 0; + + // 探索回数最大の手を見つける + for (int i = 0; i < uct_node[current_root].child_num; i++) { + if (uct_child[i].move_count > max_count) { + select_index = i; + max_count = uct_child[i].move_count; + } + if (debug_message) cout << i << ":" << uct_child[i].move.toUSI() << " move_count:" << uct_child[i].move_count << " win_rate:" << uct_child[i].win / (uct_child[i].move_count + 0.0001f) << endl; + } + + // 選択した着手の勝率の算出 + float best_wp = uct_child[select_index].win / uct_child[select_index].move_count; + + if (best_wp <= RESIGN_THRESHOLD) { + move = Move::moveNone(); + } + else { + move = uct_child[select_index].move; + + // 歩、角、飛が成らない場合、強制的に成る + if (!move.isDrop() && !move.isPromotion() && + (move.pieceTypeTo() == Pawn || move.pieceTypeTo() == Bishop || move.pieceTypeTo() == Rook)) { + // 合法手に成る手があるか + for (int i = 0; i < uct_node[current_root].child_num; i++) { + if (uct_child[i].move.isPromotion() && uct_child[i].move.fromAndTo() == move.fromAndTo()) { + // 強制的に成る + move = uct_child[i].move; + break; + } + } + } + + int cp; + if (best_wp == 1.0f) { + cp = 30000; + } + else { + cp = int(-logf(1.0f / best_wp - 1.0f) * 756.0864962951762f); + } + + // PV表示 + string pv = move.toUSI(); + { + unsigned int best_index = select_index; + const child_node_t *best_node = uct_child; + + while (best_node[best_index].index != NOT_EXPANDED) { + const int best_node_index = best_node[best_index].index; + + best_node = uct_node[best_node_index].child; + max_count = 0; + for (int i = 0; i < uct_node[best_node_index].child_num; i++) { + if (best_node[i].move_count > max_count) { + best_index = i; + max_count = best_node[i].move_count; + } + } + + if (max_count < 80) + break; + + pv += " " + best_node[best_index].move.toUSI(); + + // ponderの着手 + if (pondering_mode && ponderMove == Move::moveNone()) + ponderMove = best_node[best_index].move; + } + } + + if (!pondering) + cout << "info nps " << int((uct_node[current_root].move_count - pre_simulated) / finish_time) << " time " << int(finish_time * 1000) << " nodes " << uct_node[current_root].move_count << " hashfull " << uct_hash->GetUctHashUsageRate() << " score cp " << cp << " pv " << pv << endl; + + // 次の探索でのプレイアウト回数の算出 + CalculatePlayoutPerSec(finish_time); + + if (!pondering) + remaining_time[pos->turn()] -= finish_time; + } + + // 最善応手列を出力 + //PrintBestSequence(pos, uct_node, current_root); + // 探索の情報を出力(探索回数, 勝敗, 思考時間, 勝率, 探索速度) + if (debug_message) PrintPlayoutInformation(&uct_node[current_root], &po_info, finish_time, pre_simulated); + + return move; +} + + +///////////////////// +// 候補手の初期化 // +///////////////////// +static void +InitializeCandidate(child_node_t *uct_child, Move move) +{ + uct_child->move = move; + uct_child->move_count = 0; + uct_child->win = 0; + uct_child->index = NOT_EXPANDED; + uct_child->nnrate = 0; +} + + +///////////////////////// +// ルートノードの展開 // +///////////////////////// +static unsigned int +ExpandRoot(const Position *pos) +{ + unsigned int index = uct_hash->FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly()); + child_node_t *uct_child; + int child_num = 0; + + // 既に展開されていた時は, 探索結果を再利用する + if (index != uct_hash_size) { + PrintReuseCount(uct_node[index].move_count); + + return index; + } + else { + // 空のインデックスを探す + index = uct_hash->SearchEmptyIndex(pos->getKey(), pos->turn(), pos->gamePly()); + + assert(index != uct_hash_size); + + // ルートノードの初期化 + uct_node[index].move_count = 0; + uct_node[index].win = 0; + uct_node[index].child_num = 0; + uct_node[index].evaled = 0; + uct_node[index].value_win = 0.0f; + + uct_child = uct_node[index].child; + + // 候補手の展開 + for (MoveList ml(*pos); !ml.end(); ++ml) { + InitializeCandidate(&uct_child[child_num], ml.move()); + child_num++; + } + + // 子ノード個数の設定 + uct_node[index].child_num = child_num; + + // ノードをキューに追加 + search_groups[0].QueuingNode(pos, index); + + } + + return index; +} + + + +/////////////////// +// ノードの展開 // +/////////////////// +unsigned int +UCTSearcher::ExpandNode(Position *pos, const int depth) +{ + unsigned int index = uct_hash->FindSameHashIndex(pos->getKey(), pos->turn(), pos->gamePly() + depth); + child_node_t *uct_child; + + // 合流先が検知できれば, それを返す + if (index != uct_hash_size) { + return index; + } + + // 空のインデックスを探す + index = uct_hash->SearchEmptyIndex(pos->getKey(), pos->turn(), pos->gamePly() + depth); + + assert(index != uct_hash_size); + + // 現在のノードの初期化 + uct_node[index].move_count = 0; + uct_node[index].win = 0; + uct_node[index].child_num = 0; + uct_node[index].evaled = 0; + uct_node[index].value_win = 0.0f; + uct_child = uct_node[index].child; + + // 候補手の展開 + int child_num = 0; + for (MoveList ml(*pos); !ml.end(); ++ml) { + InitializeCandidate(&uct_child[child_num], ml.move()); + child_num++; + } + + // 子ノードの個数を設定 + uct_node[index].child_num = child_num; + + // ノードをキューに追加 + if (child_num > 0) { + grp->QueuingNode(pos, index); + } + else { + uct_node[index].value_win = 0.0f; + uct_node[index].evaled = 1; + } + + return index; +} + + +////////////////////////////////////// +// ノードをキューに追加 // +////////////////////////////////////// +void +UCTSearcherGroup::QueuingNode(const Position *pos, unsigned int index) +{ + //cout << "QueuingNode:" << index << ":" << current_policy_value_queue_index << ":" << current_policy_value_batch_index << endl; + //cout << pos->toSFEN() << endl; + + /* if (current_policy_value_batch_index >= policy_value_batch_maxsize) { + std::cout << "error" << std::endl; + }*/ + // set all zero + std::fill_n((float*)features1[current_policy_value_queue_index][current_policy_value_batch_index], sizeof(features1_t) / sizeof(float), 0.0f); + std::fill_n((float*)features2[current_policy_value_queue_index][current_policy_value_batch_index], sizeof(features2_t) / sizeof(float), 0.0f); + + make_input_features(*pos, &features1[current_policy_value_queue_index][current_policy_value_batch_index], &features2[current_policy_value_queue_index][current_policy_value_batch_index]); + policy_value_hash_index[current_policy_value_queue_index][current_policy_value_batch_index] = index; + current_policy_value_batch_index++; +} + + +////////////////////////// +// 探索打ち止めの確認 // +////////////////////////// +static bool +InterruptionCheck(void) +{ + if (pondering) + return pondering_stop; + + int max = 0, second = 0; + const int child_num = uct_node[current_root].child_num; + const int rest = po_info.halt - po_info.count; + const child_node_t *uct_child = uct_node[current_root].child; + + if (mode != CONST_PLAYOUT_MODE && + GetSpendTime(begin_time) * 10.0 < time_limit) { + return false; + } + + // 探索回数が最も多い手と次に多い手を求める + for (int i = 0; i < child_num; i++) { + if (uct_child[i].move_count > max) { + second = max; + max = uct_child[i].move_count; + } + else if (uct_child[i].move_count > second) { + second = uct_child[i].move_count; + } + } + + // 残りの探索を全て次善手に費やしても + // 最善手を超えられない場合は探索を打ち切る + if (max - second > rest) { + return true; + } + else { + return false; + } +} + + +/////////////////////////// +// 思考時間延長の確認 // +/////////////////////////// +static bool +ExtendTime(void) +{ + int max = 0, second = 0; + const int child_num = uct_node[current_root].child_num; + const child_node_t *uct_child = uct_node[current_root].child; + + // 探索回数が最も多い手と次に多い手を求める + for (int i = 0; i < child_num; i++) { + if (uct_child[i].move_count > max) { + second = max; + max = uct_child[i].move_count; + } + else if (uct_child[i].move_count > second) { + second = uct_child[i].move_count; + } + } + + // 最善手の探索回数がが次善手の探索回数の + // 1.2倍未満なら探索延長 + if (max < second * 1.2) { + return true; + } + else { + return false; + } +} + + + +///////////////////////////////// +// 並列処理で呼び出す関数 // +// UCTアルゴリズムを反復する // +///////////////////////////////// +void +UCTSearcher::ParallelUctSearch() +{ + bool interruption = false; + bool enough_size = true; + + // 探索回数が閾値を超える, または探索が打ち切られたらループを抜ける + do { + // 探索回数を1回増やす + atomic_fetch_add(&po_info.count, 1); + // 盤面のコピー + Position pos(*pos_root); + //cout << pos.toSFEN() << ":" << pos.getKey() << endl; + // 1回プレイアウトする + UctSearch(&pos, current_root, 0); + //cout << "root:" << current_root << " move_count:" << uct_node[current_root].move_count << endl; + // 探索を打ち切るか確認 + interruption = InterruptionCheck(); + // ハッシュに余裕があるか確認 + enough_size = uct_hash->CheckRemainingHashSize(); + if (!pondering && GetSpendTime(begin_time) > time_limit) break; + } while (po_info.count < po_info.halt && !interruption && enough_size); + + return; +} + + +////////////////////////////////////////////// +// UCT探索を行う関数 // +// 1回の呼び出しにつき, 1プレイアウトする // +////////////////////////////////////////////// +float +UCTSearcher::UctSearch(Position *pos, const unsigned int current, const int depth) +{ + // 詰みのチェック + if (uct_node[current].child_num == 0) { + return 1.0f; // 反転して値を返すため1を返す + } + else if (uct_node[current].value_win == VALUE_WIN) { + // 詰み + return 0.0f; // 反転して値を返すため0を返す + } + else if (uct_node[current].value_win == VALUE_LOSE) { + // 自玉の詰み + return 1.0f; // 反転して値を返すため1を返す + } + + // 千日手チェック + if (uct_node[current].evaled == 2) { + switch (pos->isDraw(16)) { + case NotRepetition: break; + case RepetitionDraw: return 0.5f; + case RepetitionWin: return 0.0f; + case RepetitionLose: return 1.0f; + case RepetitionSuperior: return 0.0f; + case RepetitionInferior: return 1.0f; + default: UNREACHABLE; + } + } + + // policyが計算されるのを待つ(他のスレッドが同じノードを先に展開した場合、nnの計算を待つ必要がある) + while (uct_node[current].evaled == 0) + this_thread::yield(); + + float result; + unsigned int next_index; + double score; + child_node_t *uct_child = uct_node[current].child; + + // 現在見ているノードをロック + LOCK_NODE(current); + // UCB値最大の手を求める + next_index = SelectMaxUcbChild(pos, current, depth); + // 選んだ手を着手 + StateInfo st; + pos->doMove(uct_child[next_index].move, st); + + // Virtual Lossを加算 + AddVirtualLoss(&uct_child[next_index], current); + // ノードの展開の確認 + if (uct_child[next_index].index == NOT_EXPANDED ) { + // ノードの展開中はロック + LOCK_EXPAND; + // ノードの展開 + // ノード展開処理の中でvalueを計算する + unsigned int child_index = ExpandNode(pos, depth + 1); + uct_child[next_index].index = child_index; + //cerr << "value evaluated " << result << " " << v << " " << *value_result << endl; + // ノード展開のロックの解除 + UNLOCK_EXPAND; + + // 現在見ているノードのロックを解除 + UNLOCK_NODE(current); + + // 詰みチェック(ValueNet計算中にチェック) + int isMate = 0; + if (!pos->inCheck()) { + if (mateMoveInOddPly(*pos, MATE_SEARCH_DEPTH)) { + isMate = 1; + } + } + else { + if (mateMoveInEvenPly(*pos, MATE_SEARCH_DEPTH - 1)) { + isMate = -1; + } + } + + // 千日手チェック + int isDraw = 0; + switch (pos->isDraw(16)) { + case NotRepetition: break; + case RepetitionDraw: isDraw = 2; break; // Draw + case RepetitionWin: isDraw = 1; break; + case RepetitionLose: isDraw = -1; break; + case RepetitionSuperior: isDraw = 1; break; + case RepetitionInferior: isDraw = -1; break; + default: UNREACHABLE; + } + + // valueが計算されるのを待つ + //cout << "wait value:" << child_index << ":" << uct_node[child_index].evaled << endl; + while (uct_node[child_index].evaled == 0) + this_thread::yield(); + + // 千日手の場合、ValueNetの値を使用しない(経路によって判定が異なるため上書きはしない) + if (isDraw != 0) { + uct_node[child_index].evaled = 2; + if (isDraw == 1) { + result = 0.0f; + } + else if (isDraw == -1) { + result = 1.0f; + } + else { + result = 0.5f; + } + + } + // 詰みの場合、ValueNetの値を上書き + else if (isMate == 1) { + uct_node[child_index].value_win = VALUE_WIN; + result = 0.0f; + } + else if (isMate == -1) { + uct_node[child_index].value_win = VALUE_LOSE; + result = 1.0f; + } + else { + // valueを勝敗として返す + result = 1 - uct_node[child_index].value_win; + } + } + else { + // 現在見ているノードのロックを解除 + UNLOCK_NODE(current); + + // 手番を入れ替えて1手深く読む + result = UctSearch(pos, uct_child[next_index].index, depth + 1); + } + + // 探索結果の反映 + UpdateResult(&uct_child[next_index], result, current); + + return 1 - result; +} + + +////////////////////////// +// Virtual Lossの加算 // +////////////////////////// +static void +AddVirtualLoss(child_node_t *child, unsigned int current) +{ + atomic_fetch_add(&uct_node[current].move_count, VIRTUAL_LOSS); + atomic_fetch_add(&child->move_count, VIRTUAL_LOSS); +} + + +////////////////////// +// 探索結果の更新 // +///////////////////// +static void +UpdateResult(child_node_t *child, float result, unsigned int current) +{ + atomic_fetch_add(&uct_node[current].win, result); + atomic_fetch_add(&uct_node[current].move_count, 1 - VIRTUAL_LOSS); + atomic_fetch_add(&child->win, result); + atomic_fetch_add(&child->move_count, 1 - VIRTUAL_LOSS); +} + +// ディリクレ分布 +void random_dirichlet(std::mt19937_64 &mt, float *x, const int size) { + const float dirichlet_alpha = 0.15f; + static std::gamma_distribution gamma(dirichlet_alpha, 1.0f); + + float sum_y = 0; + for (int i = 0; i < size; i++) { + float y = gamma(mt); + sum_y += y; + x[i] = y; + } + std::for_each(x, x + size, [sum_y](float &v) mutable { v /= sum_y; }); +} + +///////////////////////////////////////////////////// +// UCBが最大となる子ノードのインデックスを返す関数 // +///////////////////////////////////////////////////// +int +UCTSearcher::SelectMaxUcbChild(const Position *pos, const unsigned int current, const int depth) +{ + const child_node_t *uct_child = uct_node[current].child; + const int child_num = uct_node[current].child_num; + int max_child = 0; + const int sum = uct_node[current].move_count; + float q, u, max_value; + float ucb_value; + unsigned int max_index; + //const bool debug = GetDebugMessageMode() && current == current_root && sum % 100 == 0; + + max_value = -1; + + // UCB値最大の手を求める + for (int i = 0; i < child_num; i++) { + float win = uct_child[i].win; + int move_count = uct_child[i].move_count; + + // evaled + /*if (debug) { + cerr << i << ":"; + cerr << uct_node[current].move_count << " "; + cerr << setw(3) << uct_child[i].move.toUSI(); + cerr << ": move " << setw(5) << move_count << " policy " + << setw(10) << uct_child[i].nnrate << " "; + }*/ + if (move_count == 0) { + q = 0.5f; + u = 1.0f; + } + else { + q = win / move_count; + u = sqrtf(sum) / (1 + move_count); + } + + float rate = max(uct_child[i].nnrate, 0.01f); + // ランダムに確率を上げる + if (depth == 0 && rnd(*mt) <= 2) { + rate = (rate + 1.0f) / 2.0f; + } + else if (depth < 4 && depth % 2 == 0 && rnd(*mt) == 0) { + rate = std::min(rate * 1.5f, 1.0f); + } + + ucb_value = q + c_puct * u * rate; + + /*if (debug) { + cerr << " Q:" << q << " U:" << c_puct * u * rate << " UCB:" << ucb_value << endl; + }*/ + + if (ucb_value > max_value) { + max_value = ucb_value; + max_child = i; + } + } + + /*if (debug) { + cerr << "select node:" << current << " child:" << max_child << endl; + }*/ + + return max_child; +} + + +///////////////////////////////// +// 次のプレイアウト回数の設定 // +///////////////////////////////// +static void +CalculatePlayoutPerSec(double finish_time) +{ + if (finish_time != 0.0) { + po_per_sec = po_info.count / finish_time; + } + else { + po_per_sec = PLAYOUT_SPEED; + } +} + +static void +CalculateNextPlayouts(const Position *pos) +{ + if (pondering) { + po_info.num = uct_hash_size; + return; + } + + int color = pos->turn(); + + // 探索の時の探索回数を求める + if (mode == CONST_TIME_MODE) { + po_info.num = (int)(po_per_sec * const_thinking_time); + } + else if (mode == TIME_SETTING_MODE || + mode == TIME_SETTING_WITH_BYOYOMI_MODE) { + time_limit = remaining_time[color] / (14 + max(0, 30 - pos->gamePly())) + inc_time[color]; + if (mode == TIME_SETTING_WITH_BYOYOMI_MODE && + time_limit < (const_thinking_time)) { + time_limit = const_thinking_time; + } + po_info.num = (int)(po_per_sec * time_limit); + } +} + +void SetModelPath(const std::string path[max_gpu]) +{ + for (int i = 0; i < max_gpu; i++) { + if (path[i] == "") + model_path[i] = path[0]; + else + model_path[i] = path[i]; + } +} + +void UCTSearcherGroup::EvalNode() { + cudaSetDevice(gpu_id); + + if (nn == nullptr) { + nn = new NN(threads); + nn->load_model(model_path[gpu_id].c_str()); + } + + bool enough_batch_size = true; // 初回はルートノードのため待たない + while (true) { + LOCK_EXPAND; + if (running_threads == 0 && current_policy_value_batch_index == 0) { + UNLOCK_EXPAND; + break; + } + + if (running_threads == 0) { + UNLOCK_EXPAND; + this_thread::yield(); + } + + if (running_threads > 0 && (current_policy_value_batch_index == 0 || !enough_batch_size && current_policy_value_batch_index < running_threads * 0.9)) { + UNLOCK_EXPAND; + this_thread::sleep_for(chrono::milliseconds(1)); + enough_batch_size = true; + } + else { + enough_batch_size = false; + int policy_value_batch_size = current_policy_value_batch_index; + int policy_value_queue_index = current_policy_value_queue_index; + current_policy_value_batch_index = 0; + current_policy_value_queue_index = current_policy_value_queue_index ^ 1; + UNLOCK_EXPAND; + //std::cout << policy_value_batch_size << std::endl; + + // predict + nn->foward(policy_value_batch_size, features1[policy_value_queue_index], features2[policy_value_queue_index], y1, y2); + + const float(*logits)[MAX_MOVE_LABEL_NUM * SquareNum] = reinterpret_cast(y1); + const float *value = reinterpret_cast(y2); + + for (int i = 0; i < policy_value_batch_size; i++, logits++, value++) { + const unsigned int index = policy_value_hash_index[policy_value_queue_index][i]; + + /*if (index == current_root) { + string str; + for (int sq = 0; sq < SquareNum; sq++) { + str += to_string((int)features1[policy_value_queue_index][i][0][0][sq]); + str += " "; + } + cout << str << endl; + }*/ + + LOCK_NODE(index); + + const int child_num = uct_node[index].child_num; + child_node_t *uct_child = uct_node[index].child; + Color color = (Color)(*uct_hash)[index].color; + + // 合法手一覧 + std::vector legal_move_probabilities; + legal_move_probabilities.reserve(child_num); + for (int j = 0; j < child_num; j++) { + Move move = uct_child[j].move; + const int move_label = make_move_label((u16)move.proFromAndTo(), color); + legal_move_probabilities.emplace_back((*logits)[move_label]); + } + + // Boltzmann distribution + softmax_tempature_with_normalize(legal_move_probabilities); + + for (int j = 0; j < child_num; j++) { + uct_child[j].nnrate = legal_move_probabilities[j]; + } + + uct_node[index].value_win = *value; + uct_node[index].evaled = 1; + UNLOCK_NODE(index); + } + } + } +}