download non-tabular datasets with export.socrata #126

Chicago · May 6, 2017 · afd8d64 · afd8d64
1 parent 4efc592
commit afd8d64
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,7 +10,7 @@ Description: Provides easier interaction with
     format and manages throttling by 'Socrata'.
     Users can upload data to Socrata portals directly
     from R.
-Version: 1.8.0-1
+Version: 1.8.0-2
 Date: 2017-05-05
 Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
 Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(export.socrata)
 export(fieldName)
 export(isFourByFour)
 export(ls.socrata)
@@ -17,3 +18,4 @@ importFrom(jsonlite,fromJSON)
 importFrom(mime,guess_type)
 importFrom(plyr,rbind.fill)
 importFrom(utils,read.csv)
+importFrom(utils,write.csv)
diff --git a/R/RSocrata.R b/R/RSocrata.R
@@ -469,6 +469,8 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email,
 #' @param url - the base URL of a domain (e.g., "data.cityofchicago.org")
 #' @return a Gzipped file with the four-by-four and timestamp of when the download began in filename
 #' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
+#' @importFrom httr GET
+#' @importFrom utils write.csv
 #' @export
 export.socrata <- function(url) {
   dir.create(basename(url), showWarnings = FALSE) # Create directory based on URL
@@ -480,16 +482,35 @@ export.socrata <- function(url) {
 
     # Download data
     downloadUrl <- ls$distribution[[i]]$downloadURL[1] # Currently grabs CSV, which is the first element
-    d <- read.socrata(downloadUrl)
-
-    # Construct the filename output
-    downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
-    downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
-    filename <- httr::parse_url(ls$identifier[i])
-    filename$path <- substr(filename$path, 11, 19)
-    filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")
-
-    # Write file
-    write.csv(d, file = gzfile(filename))
+    if (grepl(".csv", downloadUrl)) {
+      d <- read.socrata(downloadUrl)
+
+      # Construct the filename output
+      default_format <- "csv"
+      downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
+      downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
+      filename <- httr::parse_url(ls$identifier[i])
+      filename$path <- substr(filename$path, 11, 19)
+      filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")
+
+      # Write file
+      write.csv(d, file = gzfile(filename))
+
+    } else {
+      response <- GET(downloadUrl)
+
+      # Construct the filename output
+      default_format <- response$headers$`content-disposition`
+      default_format <- strsplit(default_format, "filename=")[[1]][2]
+      downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
+      downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
+      filename <- httr::parse_url(ls$identifier[i])
+      filename$path <- substr(filename$path, 11, 19)
+      filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format)
+
+      # Write file
+      writeBin(response$content, filename)
+    }
+
   }
 }