summaryrefslogtreecommitdiffstats
path: root/src/dmclock/benchmark/data_parser.py
blob: c90d85fd9ab6f32ebe0aad558675e2fa222081f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python

class DataPoint:  
  def __init__(self):                
    self.nserver = 0;
    self.nclient = 0;
    self.heap_type = 0;  
    self.total_time_to_add_req = 0;
    self.total_time_to_complete_req = 0;
    self.config = ''

  def set_name(self, config, heap_type):
    self.config = config;
    self.heap_type = heap_type

  def get_conig(self):
    import re
    return re.split(r"/|\.", self.config)[1]

  def __str__(self):
    return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
# end DataPoint


def isFloat(elem):        
 try:
  float(elem)
  return True
 except ValueError:
  return False
#end isFloat


def parse_config_params(fname):
  nclient = 0;
  nserver = 0;
  # read config file property 
  with open(fname, 'r') as f:
    for line in f:
      line = line.strip('\n \t')
      if not line: continue;
      if line.startswith("client_count"):
        nclient += int(line.split('=')[-1]);
      if line.startswith("server_count"): 
        nserver += int(line.split('=')[-1]);
  # end of file
  return [nserver, nclient];
# parse_config_params

def make_aggregate_data_point(dps, config, heap_type): 
    # create new aggregate point
    dp = DataPoint();
    # set set and k_way_heap property
    dp.set_name(config, heap_type); 
    
    num_run = 0
    for _dp in dps:
      if _dp.config == config and _dp.heap_type == heap_type:
        # print _dp, config, heap_type
        dp.nserver =_dp.nserver
        dp.nclient = _dp.nclient
        num_run                       += 1
        dp.total_time_to_add_req      += _dp.total_time_to_add_req
        dp.total_time_to_complete_req += _dp.total_time_to_complete_req 
        
    # average
    dp.total_time_to_add_req      /= num_run;
    dp.total_time_to_complete_req /= num_run
    #print dp
    return dp;

def parse_data_points(filename):
  dps = []; #data-points
  dp = None;
  state = 0;
  configs = {}
  k_ways  = {}
  
  with open(filename, 'r') as f:
    for line in f:
      line = line.strip('\n \t')
      if not line: continue;
      
      # file_name:1:configs/dmc_sim_8_6.conf
      if line.startswith("file_name"):      
        if dp:
          dps.append(dp);
          state = 0;
         
        # new data-point 
        dp = DataPoint();
        parts = line.split(':')
        fname = parts[-1];        
        dp.heap_type = int(parts[1]);
        if dp.heap_type not in k_ways:
          k_ways[dp.heap_type] = 1;
        
        # add to the dictionary
        configs[fname] = 1;
        
        dp.config = fname;
        params = parse_config_params(fname)      
        dp.nserver = params[0];
        dp.nclient = params[-1];
         
      elif line.startswith("average"):	# take last 2 averages
        r = [float(s) for s in line.split(' ') if isFloat(s)]
        state +=1;
        #print r, dp #if isFloat(s)
        if state == 3:
          dp.total_time_to_add_req = r[0]
        elif state == 4:
          dp.total_time_to_complete_req = r[0]
        else: pass

      else: 
        pass;    
  # final entry
  dps.append(dp) 
  
  # compute average of multiple runs
  dps_avg = []
  for config in configs:
    data_per_config = []
    for k in k_ways:
      aggr_dp = make_aggregate_data_point(dps, config , k);
      data_per_config.append(aggr_dp);
    dps_avg.append(data_per_config);
  # end for
  return dps_avg;
# end parse_data_points


def create_header(num_cols):
  fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
  header = fields[0]
  #write add_req_{1, ...}
  for i in range(num_cols):
    header = '%s %s_%i'%(header, fields[1], i+2)
  #write complete_req_{1, ...}
  for i in range(num_cols):
    header = '%s %s_%i'%(header, fields[2], i+2)
  # new-line
  header = '%s\n'%(header)
  return header
# end create_header


def create_data_line(aggr_dp):
  # get common info
  dp = aggr_dp[0]
  data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
  # get the point-count
  num_cols = len(aggr_dp);
  # write add_req_{1, ...}
  for i in range(num_cols):
    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
  # write complete_req_{1, ...}
  for i in range(num_cols):
    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
  # new-line
  data_line = '%s\n'%(data_line)
  return data_line
# end create_data_line

    
def make_data(filename):
  # write the aggregated point in space separated file  
  dps = parse_data_points(filename);
  if not len(dps) : return
  print "total points: ", len(dps)
  # open file
  with open('%s.dat'%(filename), 'w+') as f:
    # write header
    f.write(create_header(len(dps[0])));
    # write data-line
    for aggr_dp in dps:
    	f.write(create_data_line(aggr_dp));


def main(output_file):
  print output_file
  make_data(output_file);

import sys
if __name__ == "__main__":
  file_name="result"
  if len(sys.argv) > 1:
    file_name=sys.argv[1].strip()
  main(file_name)