/
local.py
112 lines (94 loc) · 3.68 KB
/
local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import json
# Processing
# with open('treasury1.csv', 'r') as f:
# lines = f.readlines()
# with open('treasury.csv', 'w+') as f1:
# l = 1000
# for i in range(l):
# # print "{} out of {} complete".format(i, l)
# f1.write(lines[i])
def intersect(results):
intersection = []
if len(results) == 0:
return []
if len(results) == 1:
return results[0]
for line in results[0]:
append = True
for subset in results[1:]:
if not line in subset:
append = False
if append:
intersection.append(line)
return intersection
def filter(query, limit):
with open('treasury.csv', 'r') as f:
lines = f.readlines()
lines = lines[0:limit]
categories = lines[0].split(',')
cat_nums = []
cat_names = []
for q in query:
for i, category in enumerate(categories):
if str(category) == str(q):
cat_nums.append(i)
cat_names.append(category)
results = []
assert len(cat_names) == len(cat_nums)
for i in range(len(cat_names)):
results.append([])
cat_name = cat_names[i]
cat_num = cat_nums[i]
for line in lines[1:]:
append_or_not = False
l = line.split(',')
cur_val = l[cat_num]
search_val = query[cat_name]
if cat_name == 'zipcode':
if len(str(cur_val)) < 9 and not len(str(cur_val)) == 5:
cur_val = '0' + str(cur_val)
if str(search_val[0:2]) == cur_val[0:2]:
append_or_not = True
elif cat_name == 'principalnaicscode':
if int(str(search_val)[0:4]) == int(str(cur_val)[0:4]):
append_or_not = True
elif cat_name == 'numberofemployees':
if search_val == 'Under 10 employees':
if int(cur_val) < 10:
append_or_not = True
elif search_val == 'Between 10 - 49 employees':
if int(cur_val) >= 10 and int(cur_val) <= 49:
append_or_not = True
elif search_val == 'Over 100 employees':
if int(cur_val) > 100:
append_or_not = True
elif search_val == 'Between 50 - 99 employees':
if int(cur_val) >= 50 and int(cur_val) <= 99:
append_or_not = True
elif cur_val == search_val:
append_or_not = True
if append_or_not:
results[i].append(l)
return intersect(results)
def extract(results, keys):
categories = []
cat_nums = []
with open('treasury.csv', 'r') as f:
lines = f.readlines()
categories = lines[0].split(',')
for key in keys:
for i, category in enumerate(categories):
if key == category:
cat_nums.append(i)
json_dict = {}
# print "Extracting {} results".format(len(results))
for i, result in enumerate(results):
json_dict[str(i)] = {}
for c in range(len(cat_nums)):
json_dict[str(i)][categories[cat_nums[c]]] = result[cat_nums[c]]
# with open('templates/data.json', 'w') as outfile:
# json.dump(json_dict, outfile, indent=2)
return json.dumps(json_dict, indent=2)
def filter_and_extract(query, keys, limit=1000):
r = filter(query, limit)
return extract(r, keys)