Skip to content

Commit

Permalink
Merge pull request #658 from mapbox/coerce-feature-ids
Browse files Browse the repository at this point in the history
Add an option to convert stringified number feature IDs to numbers
  • Loading branch information
e-n-f committed Nov 7, 2018
2 parents a315064 + cc7c0cc commit 949468e
Show file tree
Hide file tree
Showing 15 changed files with 267 additions and 8 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,8 @@
## 1.32.3

* Add an option to convert stringified number feature IDs to numbers
* Add an option to use a specified feature attribute as the feature ID

## 1.32.2

* Warn in tile-join if tilesets being joined have inconsistent maxzooms
Expand Down
4 changes: 3 additions & 1 deletion Makefile
Expand Up @@ -95,7 +95,9 @@ suffixes = json json.gz
rm $@.out $@.mbtiles

# Don't test overflow with geobuf, because it fails (https://github.com/mapbox/geobuf/issues/87)
geobuf-test: tippecanoe-json-tool $(addsuffix .checkbuf,$(filter-out tests/overflow/out/-z0.json,$(TESTS)))
# Don't test stringids with geobuf, because it fails
nogeobuf = tests/overflow/out/-z0.json $(wildcard tests/stringid/out/*.json)
geobuf-test: tippecanoe-json-tool $(addsuffix .checkbuf,$(filter-out $(nogeobuf),$(TESTS)))

# For quicker address sanitizer build, hope that regular JSON parsing is tested enough by parallel and join tests
fewer-tests: tippecanoe tippecanoe-decode geobuf-test raw-tiles-test parallel-test pbf-test join-test enumerate-test decode-test join-filter-test unit
Expand Down
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -352,6 +352,8 @@ resolution is obtained than by using a smaller _maxzoom_ or _detail_.
`sum`, `product`, `mean`, `max`, `min`, `concat`, or `comma`
to specify how the named _attribute_ is accumulated onto the attribute of the same name in a feature that does survive.
* `-pe` or `--empty-csv-columns-are-null`: Treat empty CSV columns as nulls rather than as empty strings.
* `-aI` or `--convert-stringified-ids-to-numbers`: If a feature ID is the string representation of a number, convert it to a plain number to use as the feature ID.
* `--use-attribute-for-id=`*name*: Use the attribute with the specified *name* as if it were specified as the feature ID. (If this attribute is a stringified number, you must also use `-aI` to convert it to a number.)

### Filtering features by attributes

Expand Down
47 changes: 41 additions & 6 deletions geojson.cpp
Expand Up @@ -113,6 +113,13 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
fprintf(stderr, "Warning: Can't represent non-integer feature ID %s\n", id->string);
warned_frac = true;
}
} else if (std::to_string(id_value) != id->string) {
static bool warned = false;

if (!warned) {
fprintf(stderr, "Warning: Can't represent too-large feature ID %s\n", id->string);
warned = true;
}
} else {
has_id = true;
}
Expand All @@ -125,13 +132,41 @@ int serialize_geojson_feature(struct serialization_state *sst, json_object *geom
}
}
} else {
static bool warned_nan = false;
bool converted = false;

if (additional[A_CONVERT_NUMERIC_IDS] && id->type == JSON_STRING) {
char *err = NULL;
id_value = strtoull(id->string, &err, 10);

if (!warned_nan) {
char *s = json_stringify(id);
fprintf(stderr, "Warning: Can't represent non-numeric feature ID %s\n", s);
free(s); // stringify
warned_nan = true;
if (err != NULL && *err != '\0') {
static bool warned_frac = false;

if (!warned_frac) {
fprintf(stderr, "Warning: Can't represent non-integer feature ID %s\n", id->string);
warned_frac = true;
}
} else if (std::to_string(id_value) != id->string) {
static bool warned = false;

if (!warned) {
fprintf(stderr, "Warning: Can't represent too-large feature ID %s\n", id->string);
warned = true;
}
} else {
has_id = true;
converted = true;
}
}

if (!converted) {
static bool warned_nan = false;

if (!warned_nan) {
char *s = json_stringify(id);
fprintf(stderr, "Warning: Can't represent non-numeric feature ID %s\n", s);
free(s); // stringify
warned_nan = true;
}
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions main.cpp
Expand Up @@ -76,6 +76,7 @@ size_t max_tile_size = 500000;
size_t max_tile_features = 200000;
int cluster_distance = 0;
long justx = -1, justy = -1;
std::string attribute_for_id = "";

int prevent[256];
int additional[256];
Expand Down Expand Up @@ -2541,6 +2542,8 @@ int main(int argc, char **argv) {
{"attribute-description", required_argument, 0, 'Y'},
{"accumulate-attribute", required_argument, 0, 'E'},
{"empty-csv-columns-are-null", no_argument, &prevent[P_EMPTY_CSV_COLUMNS], 1},
{"convert-stringified-ids-to-numbers", no_argument, &additional[A_CONVERT_NUMERIC_IDS], 1},
{"use-attribute-for-id", required_argument, 0, '~'},

{"Filtering features by attributes", 0, 0, 0},
{"feature-filter-file", required_argument, 0, 'J'},
Expand Down Expand Up @@ -2693,6 +2696,8 @@ int main(int argc, char **argv) {
fprintf(stderr, "%s: Can't parse bounding box --%s=%s\n", argv[0], opt, optarg);
exit(EXIT_FAILURE);
}
} else if (strcmp(opt, "use-attribute-for-id") == 0) {
attribute_for_id = optarg;
} else {
fprintf(stderr, "%s: Unrecognized option --%s\n", argv[0], opt);
exit(EXIT_FAILURE);
Expand Down
2 changes: 2 additions & 0 deletions main.hpp
Expand Up @@ -3,6 +3,7 @@

#include <stddef.h>
#include <atomic>
#include <string>

struct index {
long long start = 0;
Expand Down Expand Up @@ -46,6 +47,7 @@ extern size_t TEMP_FILES;
extern size_t max_tile_size;
extern size_t max_tile_features;
extern int cluster_distance;
extern std::string attribute_for_id;

int mkstemp_cloexec(char *name);
FILE *fopen_oflag(const char *name, const char *mode, int oflag);
Expand Down
4 changes: 4 additions & 0 deletions man/tippecanoe.1
Expand Up @@ -424,6 +424,10 @@ that are dropped, coalesced\-as\-needed, or clustered. The \fIoperation\fP may b
to specify how the named \fIattribute\fP is accumulated onto the attribute of the same name in a feature that does survive.
.IP \(bu 2
\fB\fC\-pe\fR or \fB\fC\-\-empty\-csv\-columns\-are\-null\fR: Treat empty CSV columns as nulls rather than as empty strings.
.IP \(bu 2
\fB\fC\-aI\fR or \fB\fC\-\-convert\-stringified\-ids\-to\-numbers\fR: If a feature ID is the string representation of a number, convert it to a plain number to use as the feature ID.
.IP \(bu 2
\fB\fC\-\-use\-attribute\-for\-id=\fR\fIname\fP: Use the attribute with the specified \fIname\fP as if it were specified as the feature ID. (If this attribute is a stringified number, you must also use \fB\fC\-aI\fR to convert it to a number.)
.RE
.SS Filtering features by attributes
.RS
Expand Down
1 change: 1 addition & 0 deletions options.hpp
Expand Up @@ -23,6 +23,7 @@
#define A_EXTEND_ZOOMS ((int) 'e')
#define A_CLUSTER_DENSEST_AS_NEEDED ((int) 'C')
#define A_GENERATE_IDS ((int) 'i')
#define A_CONVERT_NUMERIC_IDS ((int) 'I')

#define P_SIMPLIFY ((int) 's')
#define P_SIMPLIFY_LOW ((int) 'S')
Expand Down
40 changes: 40 additions & 0 deletions serial.cpp
Expand Up @@ -182,6 +182,7 @@ static void write_geometry(drawvec const &dv, std::atomic<long long> *fpos, FILE
}
}

// called from generating the next zoom level
void serialize_feature(FILE *geomfile, serial_feature *sf, std::atomic<long long> *geompos, const char *fname, long long wx, long long wy, bool include_minzoom) {
serialize_byte(geomfile, sf->t, geompos, fname);

Expand Down Expand Up @@ -372,6 +373,7 @@ static long long scale_geometry(struct serialization_state *sst, long long *bbox
return geom.size();
}

// called from frontends
int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
struct reader *r = &(*sst->readers)[sst->segment];

Expand Down Expand Up @@ -573,6 +575,44 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
}

coerce_value(sf.full_keys[i], sf.full_values[i].type, sf.full_values[i].s, sst->attribute_types);

if (sf.full_keys[i] == attribute_for_id) {
if (sf.full_values[i].type != mvt_double && !additional[A_CONVERT_NUMERIC_IDS]) {
static bool warned = false;

if (!warned) {
fprintf(stderr, "Warning: Attribute \"%s\"=\"%s\" as feature ID is not a number\n", sf.full_keys[i].c_str(), sf.full_values[i].s.c_str());
warned = true;
}
} else {
char *err;
long long id_value = strtoull(sf.full_values[i].s.c_str(), &err, 10);

if (err != NULL && *err != '\0') {
static bool warned_frac = false;

if (!warned_frac) {
fprintf(stderr, "Warning: Can't represent non-integer feature ID %s\n", sf.full_values[i].s.c_str());
warned_frac = true;
}
} else if (std::to_string(id_value) != sf.full_values[i].s) {
static bool warned = false;

if (!warned) {
fprintf(stderr, "Warning: Can't represent too-large feature ID %s\n", sf.full_values[i].s.c_str());
warned = true;
}
} else {
sf.id = id_value;
sf.has_id = true;

sf.full_keys.erase(sf.full_keys.begin() + i);
sf.full_values.erase(sf.full_values.begin() + i);
continue;
}
}
}

}

if (!sst->filters) {
Expand Down
11 changes: 11 additions & 0 deletions tests/stringid/in.json
@@ -0,0 +1,11 @@
{ "type": "Feature", "id": 12345, "properties": {}, "geometry": { "type": "Point", "coordinates": [ 1, 1 ] } }
{ "type": "Feature", "id": "12345", "properties": {}, "geometry": { "type": "Point", "coordinates": [ 2, 1 ] } }
{ "type": "Feature", "id": "12345.6789", "properties": {}, "geometry": { "type": "Point", "coordinates": [ 3, 1 ] } }
{ "type": "Feature", "id": "9837489273489273894728943728903480989080938597489274389", "properties": {}, "geometry": { "type": "Point", "coordinates": [ 4, 1 ] } }
{ "type": "Feature", "id": 9837489273489273894728943728903480989080938597489274389, "properties": {}, "geometry": { "type": "Point", "coordinates": [ 5, 1 ] } }
{ "type": "Feature", "id": "1e5", "properties": {}, "geometry": { "type": "Point", "coordinates": [ 6, 1 ] } }
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 7, 1 ] } }
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": "27", "something": false }, "geometry": { "type": "Point", "coordinates": [ 8, 1 ] } }
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27.5, "something": false }, "geometry": { "type": "Point", "coordinates": [ 9, 1 ] } }
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 98237489273489274389728497289472839478297489274892734892, "something": false }, "geometry": { "type": "Point", "coordinates": [ 10, 1 ] } }
{ "type": "Feature", "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 11, 1 ] } }
38 changes: 38 additions & 0 deletions tests/stringid/out/-z0.json
@@ -0,0 +1,38 @@
{ "type": "FeatureCollection", "properties": {
"bounds": "1.000000,1.000000,11.000000,1.000000",
"center": "1.000000,1.000000,0",
"description": "tests/stringid/out/-z0.json.check.mbtiles",
"format": "pbf",
"json": "{\"vector_layers\": [ { \"id\": \"in\", \"description\": \"\", \"minzoom\": 0, \"maxzoom\": 0, \"fields\": {\"something\": \"Boolean\", \"special\": \"Mixed\", \"yes\": \"String\"} } ],\"tilestats\": {\"layerCount\": 1,\"layers\": [{\"layer\": \"in\",\"count\": 11,\"geometry\": \"Point\",\"attributeCount\": 3,\"attributes\": [{\"attribute\": \"something\",\"count\": 1,\"type\": \"boolean\",\"values\": [false]},{\"attribute\": \"special\",\"count\": 4,\"type\": \"mixed\",\"values\": [\"27\",27,27.5,9.823748927348929e+55],\"min\": 27,\"max\": 9.823748927348929e+55},{\"attribute\": \"yes\",\"count\": 1,\"type\": \"string\",\"values\": [\"no\"]}]}]}}",
"maxzoom": "0",
"minzoom": "0",
"name": "tests/stringid/out/-z0.json.check.mbtiles",
"type": "overlay",
"version": "2"
}, "features": [
{ "type": "FeatureCollection", "properties": { "zoom": 0, "x": 0, "y": 0 }, "features": [
{ "type": "FeatureCollection", "properties": { "layer": "in", "version": 2, "extent": 4096 }, "features": [
{ "type": "Feature", "id": 12345, "properties": { }, "geometry": { "type": "Point", "coordinates": [ 0.966797, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 1.933594, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 2.988281, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 3.955078, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 4.921875, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 5.976562, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 6.943359, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": "27", "something": false }, "geometry": { "type": "Point", "coordinates": [ 7.998047, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27.5, "something": false }, "geometry": { "type": "Point", "coordinates": [ 8.964844, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 9.823748927348929e+55, "something": false }, "geometry": { "type": "Point", "coordinates": [ 9.931641, 1.054628 ] } }
,
{ "type": "Feature", "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 10.986328, 1.054628 ] } }
] }
] }
] }
38 changes: 38 additions & 0 deletions tests/stringid/out/-z0_--use-attribute-for-id_special.json
@@ -0,0 +1,38 @@
{ "type": "FeatureCollection", "properties": {
"bounds": "1.000000,1.000000,11.000000,1.000000",
"center": "1.000000,1.000000,0",
"description": "tests/stringid/out/-z0_--use-attribute-for-id_special.json.check.mbtiles",
"format": "pbf",
"json": "{\"vector_layers\": [ { \"id\": \"in\", \"description\": \"\", \"minzoom\": 0, \"maxzoom\": 0, \"fields\": {\"something\": \"Boolean\", \"special\": \"Mixed\", \"yes\": \"String\"} } ],\"tilestats\": {\"layerCount\": 1,\"layers\": [{\"layer\": \"in\",\"count\": 11,\"geometry\": \"Point\",\"attributeCount\": 3,\"attributes\": [{\"attribute\": \"something\",\"count\": 1,\"type\": \"boolean\",\"values\": [false]},{\"attribute\": \"special\",\"count\": 3,\"type\": \"mixed\",\"values\": [\"27\",27.5,9.823748927348929e+55],\"min\": 27.5,\"max\": 9.823748927348929e+55},{\"attribute\": \"yes\",\"count\": 1,\"type\": \"string\",\"values\": [\"no\"]}]}]}}",
"maxzoom": "0",
"minzoom": "0",
"name": "tests/stringid/out/-z0_--use-attribute-for-id_special.json.check.mbtiles",
"type": "overlay",
"version": "2"
}, "features": [
{ "type": "FeatureCollection", "properties": { "zoom": 0, "x": 0, "y": 0 }, "features": [
{ "type": "FeatureCollection", "properties": { "layer": "in", "version": 2, "extent": 4096 }, "features": [
{ "type": "Feature", "id": 12345, "properties": { }, "geometry": { "type": "Point", "coordinates": [ 0.966797, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 1.933594, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 2.988281, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 3.955078, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 4.921875, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 5.976562, 1.054628 ] } }
,
{ "type": "Feature", "id": 27, "properties": { "yes": "no", "something": false }, "geometry": { "type": "Point", "coordinates": [ 6.943359, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": "27", "something": false }, "geometry": { "type": "Point", "coordinates": [ 7.998047, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27.5, "something": false }, "geometry": { "type": "Point", "coordinates": [ 8.964844, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 9.823748927348929e+55, "something": false }, "geometry": { "type": "Point", "coordinates": [ 9.931641, 1.054628 ] } }
,
{ "type": "Feature", "id": 27, "properties": { "yes": "no", "something": false }, "geometry": { "type": "Point", "coordinates": [ 10.986328, 1.054628 ] } }
] }
] }
] }
38 changes: 38 additions & 0 deletions tests/stringid/out/-z0_-aI.json
@@ -0,0 +1,38 @@
{ "type": "FeatureCollection", "properties": {
"bounds": "1.000000,1.000000,11.000000,1.000000",
"center": "1.000000,1.000000,0",
"description": "tests/stringid/out/-z0_-aI.json.check.mbtiles",
"format": "pbf",
"json": "{\"vector_layers\": [ { \"id\": \"in\", \"description\": \"\", \"minzoom\": 0, \"maxzoom\": 0, \"fields\": {\"something\": \"Boolean\", \"special\": \"Mixed\", \"yes\": \"String\"} } ],\"tilestats\": {\"layerCount\": 1,\"layers\": [{\"layer\": \"in\",\"count\": 11,\"geometry\": \"Point\",\"attributeCount\": 3,\"attributes\": [{\"attribute\": \"something\",\"count\": 1,\"type\": \"boolean\",\"values\": [false]},{\"attribute\": \"special\",\"count\": 4,\"type\": \"mixed\",\"values\": [\"27\",27,27.5,9.823748927348929e+55],\"min\": 27,\"max\": 9.823748927348929e+55},{\"attribute\": \"yes\",\"count\": 1,\"type\": \"string\",\"values\": [\"no\"]}]}]}}",
"maxzoom": "0",
"minzoom": "0",
"name": "tests/stringid/out/-z0_-aI.json.check.mbtiles",
"type": "overlay",
"version": "2"
}, "features": [
{ "type": "FeatureCollection", "properties": { "zoom": 0, "x": 0, "y": 0 }, "features": [
{ "type": "FeatureCollection", "properties": { "layer": "in", "version": 2, "extent": 4096 }, "features": [
{ "type": "Feature", "id": 12345, "properties": { }, "geometry": { "type": "Point", "coordinates": [ 0.966797, 1.054628 ] } }
,
{ "type": "Feature", "id": 12345, "properties": { }, "geometry": { "type": "Point", "coordinates": [ 1.933594, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 2.988281, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 3.955078, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 4.921875, 1.054628 ] } }
,
{ "type": "Feature", "properties": { }, "geometry": { "type": "Point", "coordinates": [ 5.976562, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 6.943359, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": "27", "something": false }, "geometry": { "type": "Point", "coordinates": [ 7.998047, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 27.5, "something": false }, "geometry": { "type": "Point", "coordinates": [ 8.964844, 1.054628 ] } }
,
{ "type": "Feature", "id": 1234, "properties": { "yes": "no", "special": 9.823748927348929e+55, "something": false }, "geometry": { "type": "Point", "coordinates": [ 9.931641, 1.054628 ] } }
,
{ "type": "Feature", "properties": { "yes": "no", "special": 27, "something": false }, "geometry": { "type": "Point", "coordinates": [ 10.986328, 1.054628 ] } }
] }
] }
] }

0 comments on commit 949468e

Please sign in to comment.