/
satp.py
55 lines (38 loc) · 1.15 KB
/
satp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import csv
from pattern.web import URL, DOM, plaintext, strip_between
from pattern.web import NODE, TEXT, COMMENT, ELEMENT, DOCUMENT
#For the 2013 datasheet, use this code:
url = URL('http://www.satp.org/satporgtp/countries/pakistan/database/majorincidents.htm')
dom = DOM(url.download(cached=True))
myarray = []
tab = dom.by_tag('table')
for i in dom.by_tag('td')[11:]:
g = i.content
h = plaintext(g)
myarray.append(h)
def chunks(l, n):
return [l[i:i+n] for i in range(0, len(l), n)]
yes = chunks(myarray, 5)
output = open("satpincidents2013.csv", "wb")
writer = csv.writer(output)
for i in yes[0:]:
SN = i[0]
Date = i[1]
Incidents = i[2]
Killed = i[3]
Injured = i[4]
writer.writerow([SN, Date, Incidents, Killed, Injured])
output.close()
#For all datasheets before 2013, use the code below
array12 = []
url12 = URL('http://www.satp.org/satporgtp/countries/pakistan/database/majorincidents2012.htm')
dom12 = DOM(url12.download(cached=True))
for i in dom12.by_tag('p')[1:]:
g = i.content
h = plaintext(g)
array12.append(h)
# print array12
new = []
for i in array12:
new.append(i.split(":"))
print new