blob: 5397c718ff2be13411c70e144b1cad2139f587c0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
#!/usr/bin/env python
import sys
import os
import re
import urlparse
def usage():
message = """ usage: {program} inDir outDir
inDir: directory containing .ht files
outDir: target for the new files"""
print(message.format(program = os.path.basename(sys.argv[0])))
def parseFile(filename):
file = open(filename, "r")
data = file.readlines()
data = [line.rstrip('\n') for line in data]
pairs = {}
regEx = re.compile("^(\S+)\s(\S+)\s(\S+)\s((?:\s*\S*)+)$")
old_line = None
for line in data:
if len(line) > 0:
if(old_line != None):
print filename
#print("failed to parse line")
#print(old_line)
line = old_line + line
print line
old_line = None
split_line = regEx.split(line)
#print(split_line)
#print(urlparse.unquote(split_line[2]))
#print(split_line[4])
if(old_line == None and split_line[4] == "" and split_line[3] != "0"):
print(line)
print(split_line)
old_line = line
else:
pairs[urlparse.unquote(split_line[2])] = split_line[4]
assert(len(split_line) == 6)
#print data
#print(pairs)
return pairs
def parseFiles(dir):
strings = []
for files in os.listdir(dir):
if files.endswith(".ht"):
string = parseFile(os.path.join(dir,files))
print(files)
#print string
strings.append([files, string])
return strings
def extractSharedEntries(strings):
first_dict = strings[0][1]
shared_dict = {}
#print(first_dict)
for key, value in first_dict.iteritems():
# check that the entry in the same in all dics
is_in_all_dicts = True
for dict_file_pair in strings:
dict = dict_file_pair[1]
if not dict.has_key(key):
is_in_all_dicts = False
elif not dict[key] == value:
print("Element with different values")
print(key)
is_in_all_dicts = False
if is_in_all_dicts:
shared_dict[key] = value
#print(shared_dict)
for dict_file_pair in strings:
for key in shared_dict.iterkeys():
dict_file_pair[1].pop(key)
strings.append(["shared.ht", shared_dict])
return strings
def writeOutFiles(dir, strings):
for string in strings:
file_name_base = string[0]
file_name_base = file_name_base.replace(".ht", ".properties")
file_name = os.path.join(dir, file_name_base)
file = open(file_name, "w")
for key, value in string[1].iteritems():
try:
file.write(key)
file.write("=")
file.write(value)
file.write("\n")
except UnicodeDecodeError:
print key
print value
file.close()
def main (args):
if(len(args) != 3):
usage()
sys.exit(1)
strings = parseFiles(args[1])
new_strings = extractSharedEntries(strings)
writeOutFiles(args[2], new_strings)
if __name__ == "__main__":
main(sys.argv)
|