/
extract_learning_outcomes.py
51 lines (30 loc) · 1.37 KB
/
extract_learning_outcomes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import csv
import subprocess
from lib.pdfdata import parse_fdf
from lib.handbookdata import get_module_list, TF_SUPPORT_FIELDS
if __name__ == "__main__":
extract_dir = os.path.join(os.getcwd(), 'extracted')
output_file = os.path.join(extract_dir, 'learning_outcomes.csv')
with open(output_file, 'w') as out:
modules = get_module_list('COMSC')
description_folder = os.path.join(os.getcwd(), 'comparisons', 'shared_drive')
module_data = []
keys = [
"Module Code",
"Learning Outcomes",
]
for module in modules:
mcode = module['moduleCode']
if os.path.exists(os.path.join(description_folder, mcode)):
print(mcode)
mdata = {"Module Code": mcode}
subprocess.run(["pdftk", "comparisons/shared_drive/%s/%s_module_description.pdf" % (mcode, mcode), "dump_data_fields_utf8", "output", "md_data.fdf"])
with open("md_data.fdf", 'r', encoding='utf-8') as data_input:
data = parse_fdf(data_input)
if data.get("learning_outcomes"):
mdata["Learning Outcomes"] = data["learning_outcomes"]#.strip()
module_data.append(mdata)
writer = csv.DictWriter(out, list(keys))
writer.writeheader()
writer.writerows(module_data)