-
Notifications
You must be signed in to change notification settings - Fork 3
/
sweep_csv.py
61 lines (53 loc) · 2.21 KB
/
sweep_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import sys
import json
import gzip
import ec2config
from analyze_basic import analyze_basic
def usage():
print "Usage: gen_csv.py experiment_name"
def extract_data(experiment_name, name, data):
log_fn = data['log_fn']
log_fn_remote = ec2config.s3_bucket + '/sweep/' + experiment_name + '/' + log_fn
log_fn_local = 'sweep/' + experiment_name + '/' + log_fn
ec2config.s3_sync_file(log_fn_remote, log_fn_local)
return analyze_basic(log_fn_local)
def gen_csv(experiment_name):
sdb_conn = ec2config.sdb_connect()
dom = sdb_conn.get_domain(ec2config.sdb_sweep_domain)
query = "select * from `{}` where (experiment_name='{}' or comment='{}')".format(
ec2config.sdb_sweep_domain, experiment_name, experiment_name)
rs = dom.select(query)
cols = ['tracefile','num_tasks','task_time','num_objects_created','object_created_size','norm_critical_path','num_nodes','num_workers_per_node', 'object_transfer_time_cost', 'scheduler', 'job_completion_time']
all_stats = []
with open('{}.csv'.format(experiment_name), 'w') as f:
f.write(','.join(cols))
f.write('\n')
for r in reversed(list(rs)):
try:
print '>>>>', r['tracefile']
print r.name, r
if 'log_fn' in r:
print "extracting data"
stats = extract_data(experiment_name, r.name, r)
print "updating"
stats.update(dict((x, y) for x, y in r.items()))
if 'env' in stats:
stats['env'] = json.loads(stats['env'])
stats['norm_critical_path'] = -1
line = ','.join(map(lambda x: str(stats[x]), cols))
print "writing output"
f.write(line)
f.write('\n')
all_stats.append(stats)
except IOError as err:
print err
with gzip.open('sweep-summaries/{}.json.gz'.format(experiment_name), 'wb') as f:
f.write(json.dumps(all_stats))
sdb_conn.close()
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
sys.exit(1)
experiment_name = sys.argv[1]
gen_csv(experiment_name)