/
intersect_maps.py
234 lines (227 loc) · 9.86 KB
/
intersect_maps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import bisect
import json
import progress
import zoning
def calculate_stream_size(stream):
old_pos = stream.tell()
stream.seek(0, 2)
size = f.tell()
stream.seek(old_pos, 0)
return size
class NullFeatures(object):
def __init__(self, map1_len, map2_len):
self._mapping = map1_len * map2_len
self._max_mapping = self._mapping * map1_len + 1
self.regions = []
def add_null_region(self, fromn, fromi, ton, toi):
fromid = fromn * self._mapping + fromi
toid = ton * self._mapping + toi
bisect.insort_right(self.regions, (fromid, toid))
def is_null(self, n, i):
fid = n * self._mapping + i
j = bisect.bisect_right(self.regions, (fid, self._max_mapping))
if j == 0:
return False
else:
return self.regions[j - 1][1] >= fid
def load_save_file(stream, logger = None):
if hasattr(stream, "name"):
estimator = progress.TimeEstimator(logger, 0, calculate_stream_size(stream), precision = 1)
else:
estimator = None
save = {}
map1_len = None
map2_len = None
null_features = None
last_index = None
for line in stream:
if estimator is not None:
estimator.increment(len(line))
data = json.loads(line)
if map1_len is None:
map1_len = data["MAP1_LEN"]
map2_len = data["MAP2_LEN"]
save[None] = null_features = NullFeatures(map1_len, map2_len)
continue;
elif data[0] is None:
fromn, fromi = data[1]
ton, toi = data[2]
last_index = data[2]
null_features.add_null_region(fromn, fromi, ton, toi)
#for n in range(fromn, ton + 1):
# for i in range(fromi, map2_len):
# if n == ton and i == toi:
# break
# save[(n, i)] = None
elif data[2] is None:
save[(data[0], data[1])] = None
last_index = data[:2]
else:
features = []
for f in data[2:]:
if f is None:
features.append(None)
else:
features.append(zoning.parse_feature(f))
save[(data[0], data[1])] = features
last_index = data[:2]
save["LAST_INDEX"] = last_index
return save
class StateSaver(object):
def __init__(self, save_state_to, flush_interval = 50000):
self.stream = save_state_to
self.flush_interval = flush_interval
self.current_state_flush = 0
self.last_state_flush = 0
self.nulls_start = None
def record_map_sizes(self, map1_len, map2_len):
if self.stream is not None:
self.stream.write("%s\n" % json.dumps({"MAP1_LEN" : map1_len, "MAP2_LEN" : map2_len}))
def record(self, n, i, *args):
if self.stream is None:
return
self.current_state_flush += 1
flush = (self.current_state_flush - self.last_state_flush) >= self.flush_interval
if args:
if self.nulls_start:
line = "[null,[%d,%d],[%d,%d]]" % (self.nulls_start[0], self.nulls_start[1], n, i)
self.stream.write("%s\n" % line)
flush = True
self.nulls_start = None
a = []
for arg in args:
if arg is None:
a.append(None)
else:
a.append(arg.to_geo())
line = json.dumps([n, i] + a)
self.stream.write("%s\n" % line)
else:
if self.nulls_start is None:
self.nulls_start = [n, i]
#line = json.dumps([n, i, None])
if flush:
self.last_state_flush = self.current_state_flush
self.stream.flush()
def intersect(map1, map2, logger = None, previous_save = None, save_state_to = None, incremental_save_path = None, incremental_save_time = 600):
if logger is None:
logger = lambda m : None
map1 = zoning.ModifiableMap(map1)
map2 = zoning.ModifiableMap(map2)
estimator = progress.TimeEstimator(logger, 0, len(map1) * len(map2), precision = 2, interval = 3.0)
saver = StateSaver(save_state_to)
last_incremental_save = 0
if previous_save is not None:
logger("\r%s\rFast-forwarding using saved state...\n" % (' ' * 40))
last_n, last_i = previous_save["LAST_INDEX"]
estimator.end_value = (last_n - 1) * len(map2) + last_i
else:
saver.record_map_sizes(len(map1), len(map2))
for n, f1 in enumerate(map1):
if f1.geometry.is_empty:
continue
for i, f2 in enumerate(map2):
if previous_save is not None and n <= last_n:
if (n, i) in previous_save:
state = previous_save[(n,i)]
if state is None:
continue
map2[i] = state[0]
if state[1] is not None:
map2.append(state[1])
map1[n] = state[2]
estimator.increment()
if map1[n].geometry.is_empty:
estimator.increment(len(map2) - i)
break
continue
elif previous_save[None].is_null(n, i):
estimator.increment()
continue
elif n < last_n or (n == last_n and i <= last_i):
estimator.increment()
continue
elif n == last_n and i == last_i:
estimator.end_value = len(map1) * len(map2)
logger("\r%s\rDone.\n" % (' ' * 40))
estimator.update(n * len(map2) + i)
if f2.geometry.is_empty:
saver.record(n, i)
continue
try:
isect = f1.geometry.intersection(f2.geometry)
except Exception as e:
logger("\r%s\rError: %s\n" % (' ' * 40, e))
estimator.force_next_refresh()
continue
if isect.is_empty:
saver.record(n, i)
continue
area_delta = 10.0 # square meters
new_feature = zoning.ZoningFeature("%s->%s" % (f1.objectid, f2.objectid), f2.zoning, isect, f2.old_zoning + f1.zoning)
new_state = [None, None, None]
if new_feature.area() < area_delta:
# The intersection is less than area_delta square meters, so it's probably just floating point error.
# Skip it!
saver.record(n, i)
continue
elif f2.area() - area_delta < new_feature.area():
# the intersection is almost covering the entire preexisting area, so just assume that they're identical.
new_feature = zoning.ZoningFeature("%s->%s" % (f1.objectid, f2.objectid), f2.zoning, f2.geometry, f2.old_zoning + f1.zoning)
else:
# add a new feature containing the portion of f2 that does not intersect with f1
new_geom = f2.geometry.difference(new_feature.geometry)
if not new_geom.is_empty:
map2.append(zoning.ZoningFeature("%s.2" % f2.objectid, f2.zoning, new_geom, f2.old_zoning))
estimator.end_value = len(map1) * len(map2)
new_state[1] = map2[-1]
map2[i] = new_feature
new_state[0] = map2[i]
logger("\r%s\rPlot %s (%.02f acres) -> %s (%.02f acres) went from %s to %s\n" % (' ' * 40, f1.objectid, zoning.square_meters_to_acres(f1.area()), f2.objectid, zoning.square_meters_to_acres(new_feature.area()), f1.zoning, f2.zoning))
estimator.force_next_refresh()
# Delete the portion of overlap in f1 to hopefully speed up further comparisons:
# (This is making the assumption that the zoning regions in map2 are non-overlapping)
map1[n] = zoning.ZoningFeature(f1.objectid, f1.zoning, f1.geometry.difference(isect))
new_state[2] = map1[n]
saver.record(n, i, *new_state)
if incremental_save_path and estimator.get_time() - last_incremental_save >= incremental_save_time:
# do an incremental save once every incremental_save_time seconds
logger("\r%s\rDoing an incremental save to %s..." % (' ' * 40, incremental_save_path))
last_incremental_save = estimator.get_time()
with open(incremental_save_path, 'w') as f:
map2.save(f)
if map1[n].geometry.is_empty:
break
logger('\n')
return map2
if __name__ == "__main__":
import os
import sys
with open(sys.argv[1], 'r') as f1:
with open(sys.argv[2], 'r') as f2:
def logger(msg):
sys.stderr.write(msg)
sys.stderr.flush()
previous_save = None
save_state_to = None
if len(sys.argv) >= 4:
if os.path.exists(sys.argv[3]):
logger('Loading save state...\n')
with open(sys.argv[3], 'r') as f:
previous_save = load_save_file(f)
logger("\r%s\rLoaded.\n" % (' ' * 40))
save_state_to = open(sys.argv[3], 'a')
try:
intersected = intersect(zoning.ZoningMap(f1), zoning.ZoningMap(f2), logger = logger, previous_save = previous_save, save_state_to = save_state_to, incremental_save_path = "%s.incremental" % sys.argv[3])
finally:
if save_state_to is not None:
save_state_to.close()
intersected.save(sys.stdout)
os.unlink("%s.incremental" % sys.argv[3])
## Sanity check:
#import StringIO
#output = StringIO.StringIO()
#intersected.save(output)
#output.seek(0)
#list(zoning.ZoningMap(output))
#output.close()