Skip to content

Commit

Permalink
Performance improvements for data point ingestion.
Browse files Browse the repository at this point in the history
* Fixed bug when calculating performance estimates using a single point.
  • Loading branch information
audaciouscode committed Aug 15, 2017
1 parent 0ed5514 commit d755c47
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 12 deletions.
19 changes: 11 additions & 8 deletions management/commands/pdk_process_bundles.py
Expand Up @@ -2,6 +2,7 @@

import datetime
import json
import logging

from django.contrib.gis.geos import GEOSGeometry
from django.core.management.base import BaseCommand
Expand All @@ -23,15 +24,18 @@ def add_arguments(self, parser):
parser.add_argument('--count',
type=int,
dest='bundle_count',
default=10,
default=25,
help='Number of bundles to process in a single run')

@handle_lock
def handle(self, *args, **options):
to_delete = []

supports_json = install_supports_jsonfield()
default_tz = timezone.get_default_timezone()

for bundle in DataBundle.objects.filter(processed=False)[:options['bundle_count']]:
if install_supports_jsonfield() is False:
for bundle in DataBundle.objects.filter(processed=False).order_by('-recorded')[:options['bundle_count']]:
if supports_json is False:
bundle.properties = json.loads(bundle.properties)

for bundle_point in bundle.properties:
Expand All @@ -46,9 +50,9 @@ def handle(self, *args, **options):
if 'latitude' in bundle_point['passive-data-metadata'] and 'longitude' in bundle_point['passive-data-metadata']:
point.generated_at = GEOSGeometry('POINT(' + str(bundle_point['passive-data-metadata']['longitude']) + ' ' + str(bundle_point['passive-data-metadata']['latitude']) + ')')

point.created = datetime.datetime.fromtimestamp(bundle_point['passive-data-metadata']['timestamp'], tz=timezone.get_default_timezone())
point.created = datetime.datetime.fromtimestamp(bundle_point['passive-data-metadata']['timestamp'], tz=default_tz)

if install_supports_jsonfield():
if supports_json:
point.properties = bundle_point
else:
point.properties = json.dumps(bundle_point, indent=2)
Expand All @@ -57,9 +61,6 @@ def handle(self, *args, **options):

point.save()

if install_supports_jsonfield() is False:
bundle.properties = json.dumps(bundle.properties, indent=2)

bundle.processed = True
bundle.save()

Expand All @@ -68,3 +69,5 @@ def handle(self, *args, **options):

for bundle in to_delete:
bundle.delete()

logging.debug("%d unprocessed payloads remaining.", DataBundle.objects.filter(processed=False).count())
6 changes: 4 additions & 2 deletions models.py
Expand Up @@ -208,9 +208,11 @@ def update_performance_metadata(self):
generator['last_recorded'] = calendar.timegm(last_recorded.recorded.timetuple())
generator['first_created'] = calendar.timegm(first_point.created.timetuple())
generator['last_created'] = calendar.timegm(last_point.created.timetuple())

duration = (last_point.created - first_point.created).total_seconds()

if generator['points_count'] > 1:
generator['frequency'] = float(generator['points_count']) / (last_point.created - first_point.created).total_seconds()
if generator['points_count'] > 1 and duration > 0:
generator['frequency'] = float(generator['points_count']) / duration
else:
generator['frequency'] = 0

Expand Down
4 changes: 2 additions & 2 deletions views.py
Expand Up @@ -113,7 +113,7 @@ def add_data_bundle(request): # pylint: disable=too-many-statements

bundle.save()

call_command('pdk_process_bundles')
# call_command('pdk_process_bundles')

return response

Expand Down Expand Up @@ -156,7 +156,7 @@ def add_data_bundle(request): # pylint: disable=too-many-statements
data_file.content_file.save(value.name, value)
data_file.save()

call_command('pdk_process_bundles')
# call_command('pdk_process_bundles')

return response
elif request.method == 'OPTIONS':
Expand Down

0 comments on commit d755c47

Please sign in to comment.