-
Notifications
You must be signed in to change notification settings - Fork 1
/
bdpify.py
111 lines (95 loc) · 3.8 KB
/
bdpify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import unicodecsv as csv
import uuid
import sys
import getopt
# Fields we're interested in and their BDP equivalents
BDP_MAP = {3:'adminID', 4:'admin',
15:'programID', 16:'program',
21:'fundID', 22:'fund',
24:'geocode'}
# Funcational classification (mapped to cofog) is in the following fields
COFOG_FIELDS = (5,7,9)
# Amount is in column 26
AMOUNT = 26
# Read in the mexican functional classification to cofog map
COFOG_MAP = {}
def generate_cofog_map(filename):
with open(filename) as cofog_map_file:
reader = csv.reader(cofog_map_file)
headers = reader.next()
for row in reader:
# We join the Mexican classification with dots
mexican_id = '.'.join(row[:-1])
COFOG_MAP[mexican_id] = row[-1]
def generate_output_headers():
# Manual headers we add (or transform existing fields somehow)
headers = ['id', 'amount', 'cofog', 'functionalID', 'functional']
# Add other fields ordered by name
for field_id in sorted(BDP_MAP.keys()):
headers.append(BDP_MAP[field_id])
return headers
def output_writer(filename, reader):
with open(filename, 'w') as output_file:
writer = csv.writer(output_file)
# Write the headers
writer.writerow(generate_output_headers())
for input_row in reader:
# Initialize the row with a generated id
output_row = [uuid.uuid4().hex]
# Amount is represented with , and . e.g. 4,321.56 but it should
# just be represented as 4321.56 so we remove the commas.
output_row.append(float(input_row[AMOUNT].replace(',','')))
# We need to grab the Mexican functional ids, concatenate them
# on . (something we chose) and look them up in the cofog map
# we have generated which will give us the cofog value (next
# column in the csv)
mexican_ids = []
for field in COFOG_FIELDS:
mexican_ids.append(input_row[field])
output_row.append(COFOG_MAP['.'.join(mexican_ids)])
# We also add the Mexican functional classification with the
# . join we use
output_row.append('.'.join(mexican_ids))
# We also join the labels for the Mexican functional
# classification (this concludes manual additions)
mexican_label = []
for field in COFOG_FIELDS:
mexican_label.append(input_row[field+1])
output_row.append(' - '.join(mexican_label))
# Then lastly we add the values of the mapped fields and write
# out the row to the csv file
for field_id in sorted(BDP_MAP.keys()):
output_row.append(input_row[field_id])
writer.writerow(output_row)
def input_reader(filename):
with open(filename) as input_file:
reader = csv.reader(input_file)
# Ignore header row
_ign = reader.next()
for input_row in reader:
if input_row[-1] == '':
continue
yield input_row
def parse_commandline():
'''
Parse command line variables
-m cofog-mapping-file
-o output-file
'''
# Parse commandline using getopts
myopts, args = getopt.getopt(sys.argv[1:],"o:m:")
if len(args) != 1:
print "We only support one argument"
sys.exit()
output = {'input': args[0]}
for option, value in myopts:
if option == '-o':
output['output'] = value
elif option == '-m':
output['cofog mapping'] = value
return output
if __name__ == '__main__':
options = parse_commandline()
generate_cofog_map(options.get('cofog mapping', 'data/cofog_map.csv'))
reader = input_reader(options['input'])
writer = output_writer(options.get('output', 'output.csv'), reader)