-
Notifications
You must be signed in to change notification settings - Fork 0
/
player.hpp
194 lines (162 loc) · 4.88 KB
/
player.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#ifndef PLAYER_H
#define PLAYER_H
#include "engine.hpp"
#include <iostream>
#include <chrono>
#include <stdio.h>
#include <stdlib.h>
#include <unordered_map>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <semaphore.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdarg.h>
#include <cstdio>
#include <stdlib.h>
#include <string>
#include <inttypes.h>
#include <unistd.h>
#include <vector>
#include <random>
#include <math.h>
// typedef int (*foo_ptr_t)( int );
typedef void (*fp)(int);
/* MRD I put some timers in param.cpp */
std::chrono::duration<double, std::nano> cast_nano2(std::chrono::duration<double> x);
class Player
{
public:
int color;
Engine *e;
Player(int col, Engine *engine);
int get_color();
virtual int move(int* move_list)=0;
virtual void cleanup()=0;
};
class Rand: public Player
{
public:
Rand(int col, Engine* engine);
int move(int* move_list);
void cleanup();
};
class Human: public Player
{
public:
Human(int col, Engine* engine);
int move(int* move_list);
int parse_coords(std::string seq);
void cleanup();
};
class Minimax: public Player
{
public:
Minimax(int col, Engine* engine, int search_limit);
int minimax_white(int depth, double alpha, double beta);
int minimax_black(int depth, double alpha, double beta);
int move(int* move_list);
int* copy_move_list(int* move_list);
int decode_terminal_score(int term);
void cleanup();
private:
int depth_search_limit;
int node_count;
};
// struct impl of node
struct Node
{
U64 board_hash;
Node* parent_node;
Node* children_nodes;
int num_children;
bool expanded;
bool is_terminal;
bool is_pass;
int color;
int move;
int visits;
float policy; // the inital policy value found from the net running on the above node
float value; // the inital set of the value net run on this board position
float calced_q; // all sub nodes will add their value functions to it
float total_action_value; // updated in the backprop stats
};
// struct new_params
// {
// int size;
// std::string semaphore_name;
// std::string shared_memory_name;
// int permissions;
// };
class MonteCarlo: public Player
{
public:
MonteCarlo(int col, Engine* engine, std::string m_path, int sims, bool training,
sem_t* pSem, void* pSem_code, void* pSem_rest);
int move(int* move_list);
Node* traverse_tree(Node* node, int p_color);
void expand_node(Node* node);
void expand_node(Node* node, int* move_list);
void backup_stats(Node* node);
Node* max_child_puct(Node* node);
Node* max_child_visits(Node* node);
float compute_puct(Node* node);
int node_argmax(Node* node, int num_nodes);
void calc_action_probs(Node* node);
void cleanup();
// model and communication
void load_board_state_to_int_arr_sender(int p_color);
int send_and_recieve_model_data(int p_color);
void fill_random_ints(int* ints_to_fill, int num_ints);
int acquire_semaphore(sem_t *pSemaphore);
int release_semaphore(sem_t *pSemaphore);
void add_dirichlet_noise(float epsilon, float alpha);
// temporary funcs
int temp_value_calc();
// helper funcs
int color_multiplier(int p_color);
int get_true_result();
void init_default_node(Node* node);
Node* create_default_node();
int* generate_moves_wrapper(int p_color);
void push_move_wrapper(int move, int p_color);
void print_node_info(Node* node);
void print_best_graph(Node* node);
void print_all_subnodes(Node* node);
void print_all_subnodes_helper(Node* node, int depth);
// saver funcs
float* get_saved_action_probs();
float get_saved_value();
int no_decision;
private:
std::string model_path;
int max_sims;
// think about collisions, backprop will not go to correct parent
std::unordered_map<U64, Node*> node_storage;
U64 node_storage_counter;
Node* curr_root;
bool is_training;
float explore_constant;
float saved_value;
float* saved_action_probs;
bool print_on;
// communication variables
sem_t *pSemaphore;
int rc;
void *pSharedMemory_code;
void *pSharedMemory_rest;
// int fd;
// struct new_params params;
// // sender flag
int32_t send_code; // -1 is nothing, 0 is c sent, 1 is python sent
// data holders
int num_ints_send;
int num_floats_recieve;
int32_t* int_arr_sender;
float* float_arr_reciever;
int temperature;
};
#endif