-
Notifications
You must be signed in to change notification settings - Fork 0
/
RunEFR-EV.py
77 lines (66 loc) · 2.3 KB
/
RunEFR-EV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import time
import sys
import copy
import EFR
from multiprocessing import Pool
from open_spiel.python.algorithms import expected_game_score
from collections import defaultdict
import StoreTabularPolicy
import pyspiel
FL_game = 'leduc_poker'
FL_iterations = 2
FL_players = 2
FL_Runname = "Test run"
try:
FL_Runname = sys.argv[1]
except:
pass
try:
FL_iterations = int(sys.argv[2])
print(FL_iterations)
except:
pass
try:
FL_deviation = sys.argv[3]
except:
pass
def iteration_update(efr_solver):
efr_solver.evaluate_and_update_policy()
print("done")
return efr_solver
header = ['Iteration', 'Deviation Set', 'Average Expected Value']#, 'Average Payoff', 'Value compared to uniform']
efr_data = []
efr_solvers = []
deviation_types = ["blind action", "informed action", "blind cf", "informed cf", "bps", "cfps", "csps", "tips", "bhv"]
game = pyspiel.load_game(FL_game, {"players": FL_players})
for deviation_type in deviation_types:
efr_solvers.append(EFR.EFRSolver(copy.deepcopy(game), deviation_type))
for iteration in range(FL_iterations):
#Batch update all EFR
with Pool(processes=8) as pool:
efr_solvers = pool.map(iteration_update, efr_solvers)
#for i in range(len(efr_solvers)):
# iteration_update(efr_solvers[i])
expected_values = defaultdict(float)
for i in range(len(deviation_types)):
for h in range(len(deviation_types)):
#Check if correct
if i == h:
continue
player_one = efr_solvers[i].average_policy()
player_two = efr_solvers[h].average_policy()
expected_value = expected_game_score.policy_value(game.new_initial_state(), [player_one, player_two])
#Want
expected_values[deviation_types[i]] += expected_value[0]
expected_values[deviation_types[h]] += expected_value[1]
#Normalise to find avg as we have added 2*devition types expected values
for deviation_type in deviation_types:
expected_values[deviation_type] = expected_values[deviation_type] / (2*len(deviation_types))
efr_dev_iteration_data = [iteration]
efr_dev_iteration_data.append(deviation_type)
efr_dev_iteration_data.append(expected_values[deviation_type])
print(expected_values)
efr_data.append(efr_dev_iteration_data)
EFR_data = pd.DataFrame(efr_data, columns=header)
EFR_data.to_csv(FL_Runname+'EV.csv', index=False)