/
parsers.py
136 lines (107 loc) · 3.62 KB
/
parsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import logging
import StringIO
import threading
import urllib
import urlparse
import zipfile
from google.appengine.api import urlfetch
from xml.dom.minidom import parseString
from xml import sax
class Parser():
def parse(self, uri):
points = None
result = urlfetch.fetch(uri)
if result.status_code == 200:
content = self._preProcess(result.content)
contentHandlers = self._getContentHandlers()
while not points and len(contentHandlers) > 0:
contentHandler = contentHandlers.pop()
sax.parseString(content, contentHandler)
points = contentHandler.points
else:
# TODO(moishel): raise parse exception
pass
return points
def _preProcess(self, content):
return content
class GpxContentHandler(sax.handler.ContentHandler):
def __init__(self):
self.points = []
def startElement(self, name, attrs):
if name == 'trkpt' or name == 'waypt' or name == 'rtept':
self.points.append([attrs.getValue('lat'),
attrs.getValue('lon')])
class GpxParser(Parser):
def _getContentHandlers(self):
return [GpxContentHandler()]
class CoordinateBlockContentHandler(sax.handler.ContentHandler):
def __init__(self):
self.in_linestring = False
self.in_coordinates = False
self.content = ""
self.points = []
def startElement(self, name, attrs):
if name == self.bounding_el:
self.in_linestring = True
elif self.in_linestring and name == 'coordinates':
self.in_coordinates = True
def endElement(self, name):
if name == self.bounding_el:
self.in_linestring = False
elif name == 'coordinates':
if self.in_coordinates and self.in_linestring:
self.process_coordinates()
self.in_coordinates = False
self.content = ""
def characters(self, content):
if self.in_coordinates and self.in_linestring:
self.content += content
def process_coordinates(self):
content = self.content
coord_list = content.split(' ')
for coord in coord_list:
coord_array = coord.split(',')
if len(coord_array) >= 2:
lat = coord_array[1].strip()
lng = coord_array[0].strip()
if lat and lng:
self.points.append([lat, lng])
class KmlLineStringContentHandler(CoordinateBlockContentHandler):
def __init__(self):
CoordinateBlockContentHandler.__init__(self)
self.bounding_el = 'LineString'
class KmlLinearRingContentHandler(CoordinateBlockContentHandler):
def __init__(self):
CoordinateBlockContentHandler.__init__(self)
self.bounding_el = 'LinearRing'
class KmlTrackContentHandler(sax.handler.ContentHandler):
def __init__(self):
self.in_track = False
self.in_coord = False
self.points = []
def startElement(self, name, attrs):
if name == 'gx:Track':
self.in_track = True
elif self.in_track and name == 'gx:coord':
self.in_coord = True
def endElement(self, name):
if name == 'gx:Track':
self.in_track = False
elif name == 'gx:coord':
self.in_coord = False
def characters(self, content):
if self.in_coord:
coord_array = content.split(' ')
if len(coord_array) >= 2:
lat = coord_array[1].strip()
lng = coord_array[0].strip()
if lat and lng:
self.points.append([lat, lng])
class KmlParser(Parser):
def _getContentHandlers(self):
return [KmlLineStringContentHandler(), KmlTrackContentHandler(), KmlLinearRingContentHandler()]
class KmzParser(KmlParser):
def _preProcess(self, content):
zf = zipfile.ZipFile(StringIO.StringIO(content))
new_content = zf.read(zf.filelist[0].filename)
return new_content