-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrps.cpp
87 lines (78 loc) · 2.23 KB
/
rps.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const int ROCK = 0, PAPER = 1, SCISSORS = 2, NUM_ACTIONS = 3;
uniform_real_distribution<double> unif(0, 1);
default_random_engine re;
class RPSTrainer {
private:
double regretSum[NUM_ACTIONS], regretSumOpp[NUM_ACTIONS], strategy[NUM_ACTIONS], strategySum[NUM_ACTIONS], oppStrategy[NUM_ACTIONS], strategySumOpp[NUM_ACTIONS];
void getStrategy(double *s, double *r, double *sum){
double normalizingSum = 0;
for (int i = 0; i < NUM_ACTIONS; i++){
s[i] = r[i] > 0 ? r[i] : 0;
normalizingSum += s[i];
}
for (int i = 0; i < NUM_ACTIONS; i++){
if (normalizingSum > 0){
s[i] /= normalizingSum;
} else {
s[i] = 1.0 / NUM_ACTIONS;
}
sum[i] += s[i];
}
}
int getAction(double *s){
double r = unif(re);
int a = 0;
double cumulativeProbability = 0;
while (a < NUM_ACTIONS - 1){
cumulativeProbability += s[a];
if (r < cumulativeProbability){
break;
}
a++;
}
return a;
}
public:
void train(int iterations){
double actionUtility[NUM_ACTIONS] = {};
for (int i = 0; i < iterations; i++){
getStrategy(strategy, regretSum, strategySum);
getStrategy(oppStrategy, regretSumOpp, strategySumOpp);
int myAction = getAction(strategy);
int otherAction = getAction(oppStrategy);
actionUtility[otherAction] = 0;
actionUtility[otherAction == NUM_ACTIONS - 1 ? 0 : otherAction + 1] = 1;
actionUtility[otherAction == 0 ? NUM_ACTIONS - 1 : otherAction - 1] = -1;
for (int a = 0; a < NUM_ACTIONS; a++){
regretSum[a] += actionUtility[a] - actionUtility[myAction];
regretSumOpp[a] += actionUtility[otherAction] - actionUtility[a];
}
}
}
double* getAverageStrategy(){
static double avgStrategy[NUM_ACTIONS] = {};
double normalizingSum = 0;
for (int a = 0; a < NUM_ACTIONS; a++){
normalizingSum += strategySum[a];
}
for (int a = 0; a < NUM_ACTIONS; a++){
if (normalizingSum > 0){
avgStrategy[a] = strategySum[a] / normalizingSum;
} else {
avgStrategy[a] = 1.0 / NUM_ACTIONS;
}
}
return avgStrategy;
}
};
int main(){
RPSTrainer trainer;
trainer.train(1000000);
double *avg = trainer.getAverageStrategy();
for (int i = 0; i < NUM_ACTIONS; i++){
cout << avg[i] << " \n"[i == NUM_ACTIONS - 1];
}
}