Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compress function to return object with reduced memory usage #123

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,28 @@ var geojson = geobuf.decode(new Pbf(data));
Given a [Pbf](https://github.com/mapbox/pbf) object with Geobuf data, return a GeoJSON object. When loading Geobuf data over `XMLHttpRequest`, you need to set `responseType` to [`arraybuffer`](https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/responseType).


### compress

```js
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)));
```

Given a GeoJSON object (or array of GeoJSON objects), returns an equivalent object with lower memory usage (avoid wasting memory usage on excess array capacity).
This may be useful if GeoJSON objects are kept around for a long time after creating them.

```js
// To additionally deduplicate identical arrays
// (may be unsafe if the geodata points are modified by callers)
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), new Map(), new Map());
// To reuse caches when deduplicating multiple geobuf objects:
// (may be unsafe if the geodata points are modified by callers)
var cache = new Map();
var numericArrayCache = new Map();
var geojson = geobuf.compress(geobuf.decode(new Pbf(data)), cache, numericArrayCache);
```

When `Map` is unavailable, this returns the original object without attempting to compress.

## Install

Node and Browserify:
Expand Down
113 changes: 113 additions & 0 deletions compress.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
'use strict';

if (typeof Map == 'undefined' || !Object.entries) {
module.exports = function compress(value) {
return value;
};
return;
}

/**
* @param {array} value
* @returns {Boolean} is this an array where all fields are numbers (including the empty array).
*/
function isNumericArray(array) {
for (var i = 0; i < array.length; i++) {
var v = array[i];
if (typeof (v) !== 'number') {
return false;
}
}
return true;
}

/**
* @param {string[]} array, possibly including Infinity/NaN
* @return {string} cache key identifying an array with those numbers.
*/
function createCacheKey(array) {
var parts = [];
for (var i = 0; i < array.length; i++) {
var v = array[i];
// String(-0) === '0'
var representation = v === 0 && 1 / v < 0 ? '-0' : String(v);
parts.push(representation);
}
return parts.join(',');
}

/**
* Compress data returned by geobuf's decode function.
* Objects are modified in place.
*
* This is useful in cases where the polygons will be used for a long time.
* By default, arrays are reserved with extra capacity that won't be used.
* (The empty array starts with a capacity of 16 elements by now,
* which is inefficient for decoded points of length 2)
*
* This has an optional option to deduplicate identical points,
* which may be useful for collections of polygons sharing points as well
* as for calling compress multiple times with different objects.
*
* @param {any} value the value to compress.
* @param {Map} [cache] by default, a new cache is created each time for external calls to compress.
* Must support get/has/set.
* @param {null|Map} [numericArrayCache] if non-null, this will be used to deduplicate
* numeric arrays of any length, including empty arrays.
*
* This deduplication may be unsafe if callers would modify arrays.
* @return {any} value with all fields compressed.
*/
function compress(value, cache = new Map(), numericArrayCache = null) {
var i;
if (cache.has(value)) {
return cache.get(value);
}
if (Array.isArray(value)) {
// By default, v8 allocates an array with a capacity of 16 elements.
// This wastes memory for small arrays such as Points of length 2.
//
// The function slice is used because it was available in older JS versions
// and experimentally appears to reduce capacity used.
var result = value.slice();
if (numericArrayCache && isNumericArray(result)) {
var cacheKey = createCacheKey(result);
var cachedEntry = numericArrayCache.get(cacheKey);
if (cachedEntry) {
cache.set(value, cachedEntry);
return cachedEntry;
}
// Reuse array instances such as [], [1.5, 1.5]
numericArrayCache.set(cacheKey, result);
cache.set(value, result);
// Nothing left to compress.
return result;
}
// Store this in the cache immediately to guard against infinite recursion on
// invalid inputs.
cache.set(value, result);
for (i = 0; i < result.length; i++) {
result[i] = compress(result[i], cache, numericArrayCache);
}
return result;
} else if (value && typeof value === 'object') {
// Compress fields of the object in place.
// Set this to the cache immediately to prevent infinite recursion on invalid data.
cache.set(value, value);
var entries = Object.entries(value);
for (i = 0; i < entries.length; i++) {
var entry = entries[i];
var field = entry[1];
var compressedValue = compress(field, cache, numericArrayCache);
if (field !== compressedValue) {
// Replace object field for this key with the compressed version
value[entry[0]] = compressedValue;
}
}
} else if (typeof value === 'string') {
// Deduplicate strings.
cache.set(value, value);
}
return value;
}
module.exports = compress;
1 change: 1 addition & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

exports.encode = require('./encode');
exports.decode = require('./decode');
exports.compress = require('./compress');
88 changes: 88 additions & 0 deletions test/validate.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,95 @@ test('roundtrip a circle with potential accumulating error', function (t) {
t.end();
});

test('can compress memory', function (t) {
if (typeof Map === 'undefined') {
t.end();
return;
}
// Generate an invalid shape with duplicate points.
var feature = {
'type': 'MultiPolygon',
'coordinates': [[[]]]
};
var points = 16;
for (var i = 0; i <= points; i++) {
feature.coordinates[0][0].push([
Math.cos(Math.PI * 2.0 * (i % 4) / points),
Math.sin(Math.PI * 2.0 * (i % 4) / points)
]);
}
var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf())));
var originalJSON = JSON.stringify(roundTripped);
var compressedFeature = geobuf.compress(roundTripped);
var compressedJSON = JSON.stringify(compressedFeature);
var c = compressedFeature.coordinates;
t.same(compressedJSON, originalJSON);
t.same(c[0][0][0], c[0][0][4], 'should be points with equivalent data');
t.notStrictEqual(c[0][0][0], c[0][0][4], 'should not deduplicate different array instances by default');
t.same(c[0][0][0], [1, 0], 'should preserve value');
t.end();
});
test('can compress memory and deduplicate points', function (t) {
if (typeof Map === 'undefined') {
t.end();
return;
}
// Generate an invalid shape with duplicate points.
var feature = {
'type': 'MultiPolygon',
'coordinates': [[[]]]
};
var points = 12;
for (var i = 0; i <= points; i++) {
feature.coordinates[0][0].push([
Math.cos(Math.PI * 2.0 * (i % 4) / points),
Math.sin(Math.PI * 2.0 * (i % 4) / points)
]);
}
var roundTripped = geobuf.decode(new Pbf(geobuf.encode(feature, new Pbf())));
var originalJSON = JSON.stringify(roundTripped);
var compressedFeature = geobuf.compress(roundTripped, new Map(), new Map());
var compressedJSON = JSON.stringify(compressedFeature);
var polygon = compressedFeature.coordinates[0][0];
t.same(compressedJSON, originalJSON);
t.same(polygon[0], polygon[4], 'should be polygon with equivalent data');
t.strictEqual(polygon[0], polygon[4], 'should deduplicate different array instances when cache passed in');
t.strictEqual(polygon[0], polygon[8], 'should deduplicate different array instances when cache passed in');
t.same(polygon[0], [1, 0], 'should preserve value');
t.end();
});
test('compress should handle infinite numbers', function (t) {
var INF = 1 / 0;
// JSON.stringify doesn't support INF
var original = [[INF], [-INF], [0], [0], [INF]];
var compressedData = geobuf.compress(original, new Map(), new Map());
t.same([[INF], [-INF], [0], [0], [INF]], compressedData);
t.strictEqual(compressedData[2], compressedData[3]);
t.strictEqual(compressedData[0], compressedData[4]);
t.end();
});
test('compress should handle NaN', function (t) {
var original = [[0, Number.NaN], [0, Number.NaN], [0, null]];
var compressedData = geobuf.compress(original, new Map(), new Map());
t.strictEqual(compressedData[0][0], 0);
t.strictEqual(compressedData[0], compressedData[1]);
t.same(compressedData[2], [0, null]);
t.ok(Number.isNaN(compressedData[0][1])); // Note that NaN !== NaN
t.end();
});
test('compress should handle negative 0', function (t) {
var original = [[0], [0], [-0]];
var compressedData = geobuf.compress(original, new Map(), new Map());
// strictEqual uses https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/is
t.strictEqual(compressedData[0], compressedData[1]);
t.notStrictEqual(compressedData[0], compressedData[2]);
t.notStrictEqual(compressedData[0][0], compressedData[2][0]);
t.strictEqual(compressedData[0][0], 0);
t.strictEqual(compressedData[2][0], -0);
t.end();
});
function roundtripTest(geojson) {

return function (t) {
var buf = geobuf.encode(geojson, new Pbf());
var geojson2 = geobuf.decode(new Pbf(buf));
Expand Down