/
testing.Rmd
166 lines (134 loc) · 7.47 KB
/
testing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
---
title: "Testing RWebData"
author: "Ulrich Matter"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
Install and load latest version of the package
========================================================
```{r eval=FALSE}
library(devtools)
install_github("umatter/rwebdata")
```
```{r}
library(RWebData)
```
Unit tests of data conversion in package examples
========================================================
```{r}
library(testthat)
# cases that should work
cases <- c("microcapital.json",
"JSON_fiction.json",
"microcapital.xml",
"XML_fiction.xml",
"microcapital.yaml",
"YAML_fiction.yml")
cases_paths <- lapply(cases, function(x) system.file("exdata", x, package= "RWebData"))
# unit testing (silent if no errors occur)
test_that("Conversion of raw web data to list of data.frames works", {
# cases that should work
expect_is(JSONtoDataFrame(cases_paths[[1]]), class(list()))
expect_is(JSONtoDataFrame(cases_paths[[2]]), class(list()))
expect_is(XMLtoDataFrame(cases_paths[[3]]), class(list()))
expect_is(XMLtoDataFrame(cases_paths[[4]]), class(list()))
expect_is(YAMLtoDataFrame(cases_paths[[5]]), class(list()))
expect_is(YAMLtoDataFrame(cases_paths[[6]]), class(list()))
# test if JSON/YAML return identical results
expect_identical(YAMLtoDataFrame(cases_paths[[5]]), JSONtoDataFrame(cases_paths[[1]]))
# test if returned data structure is basically identical for all three raw formats
expect_output(str(JSONtoDataFrame(cases_paths[[1]])), ":'data.frame': 1 obs. of 2 variables:")
expect_output(str(XMLtoDataFrame(cases_paths[[3]])), ":'data.frame': 1 obs. of 2 variables:")
expect_output(str(YAMLtoDataFrame(cases_paths[[5]])), ":'data.frame': 1 obs. of 2 variables:")
# cases that should not work
expect_error(JSONtoDataFrame("foobar"))
expect_error(XMLtoDataFrame("foobar"))
expect_error(YAMLtoDataFrame("foobar"))
})
# manually inspect output by running default package examples
example(XMLtoDataFrame)
example(YAMLtoDataFrame)
example(JSONtoDataFrame)
```
Test basic functionality with openly accessible APIs
========================================================
```{r}
# Example API calls
example_urls <-
c("https://projects.propublica.org/nonprofits/api/v1/organizations/142007220.json"
,"https://projects.propublica.org/free-the-files/stations/WEWS-TV.json"
,"http://projects.propublica.org/forensics/systems.json"
,"https://data.police.uk/api/crimes-street/all-crime?lat=52.629729&lng=-1.131592&date=2017-01"
,"https://health.data.ny.gov/resource/cnih-y5dw.json?date=2015-02-11T00%3A00%3A00"
,"https://data.consumerfinance.gov/resource/jhzv-w97w.json"
,"http://stats.oecd.org/restsdmx/sdmx.ashx/GetData/QNA/AUS.B1_GE+P31S14_S15+P3S13+P51+P52_P53+B11+P6+P7.GYSA+GPSA+CTQRGPSA.A/all?startTime=2016&endTime=2017"
,"http://ergast.com/api/f1/2013/1/results.json"
,"http://search.worldbank.org/api/v2/projects?format=json&qterm=Water&source=IBRD&srt=score&order=desc&kw=N"
,"http://api.worldbank.org/countries"
,"http://api.worldbank.org/countries?incomeLevel=LMC&format=xml"
,"http://api.worldbank.org/countries?incomeLevel=LMC&format=json"
,"https://api.github.com/users/hadley/orgs"
,"https://api.github.com/users/hadley/repos"
,"https://api.github.com/repos/hadley/ggplot2/commits"
,"http://citibikenyc.com/stations/json"
,'http://ergast.com/api/f1/2012/1/results.json'
,"https://zlzlap7j50.execute-api.us-east-1.amazonaws.com/prod"
,"https://en.wikipedia.org/w/api.php?action=query&titles=Internet|Switzerland&prop=revisions&format=json"
,"https://en.wikipedia.org/w/api.php?action=query&titles=Internet|Switzerland&prop=info&format=xml"
,"https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20weather.forecast%20where%20woeid%20in%20(select%20woeid%20from%20geo.places(1)%20where%20text%3D%22nome%2C%20ak%22)&format=json&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys"
,"http://www.wikia.com/api/v1/Activity/LatestActivity?limit=10&namespaces=0&allowDuplicates=true"
,"https://graphical.weather.gov/xml/DWMLgen/schema/latest_DWMLByDay24hr.txt"
,"http://content.guardianapis.com/search?api-key=test"
,"https://api.discogs.com/artists/1/releases?page=2&per_page=75"
,"http://openlibrary.org/people/george08/lists.json"
,"https://oc-index.library.ubc.ca/collections/berkpost/items"
,"http://domesdaymap.co.uk/api/1.0/area/"
,"https://api.opensource.org/license/GPL-3.0"
,"https://api.opensource.org/licenses/copyleft"
,"https://www.openhumans.org/api/public-data/?username=mpball&limit=5"
)
# Run unit tests of getTabularData
test_that("Query, parse, and convert data from APIs works", {
for (i in example_urls) {
expect_output(print(class(getTabularData(i))), "(data.frame|list)", info = TRUE)
}
})
# inspect details
for (i in example_urls) {
print(i)
attempt <- try(getTabularData(i))
if (class(attempt)=="try-error") {
cat(paste0("Error in call", i, "\n"))
} else {
print(summary(attempt))
}
}
```
Unit test summary
===================
```{r echo=FALSE, }
library(testthat)
library(knitr)
# run all unit tests at once
results <- test_file("unit_testing.R")
kable(as.data.frame(results))
```
APIs needing registration
========================================================
In addition to the examples listed above, RWebData has been tested with various APIs that need user registration. Examples of API-calls that work with the current version (replace `<YOUR-API-KEY>` with your personal key after registering with the respective API in order to run the example code):
`"http://openstates.org/api/v1/legislators/DCL000003/?apikey=<YOUR-API-KEY>"`
`"http://openstates.org/api/v1/legislators/?state=dc&apikey=<YOUR-API-KEY>"`
`"http://politicalpartytime.org/api/v1/event/?apikey=<YOUR-API-KEY>&limit=50&offset=50&format=json"`
`"http://politicalpartytime.org/api/v1/host/?apikey=<YOUR-API-KEY>&limit=50&offset=50&format=json"`
`"http://openstates.org/api/v1/bills/?state=dc&q=taxi&apikey=<YOUR-API-KEY>"`
`"http://api.legiscan.com/?key=<YOUR-API-KEY>&op=getMasterList&state=CA"`
`"https://api.foursquare.com/v2/venues/search?client_id=<YOUR-CLIENT-ID>&client_secret=<YOUR-CLIENT-SECRET>&v=20130815&ll=40.7-74&query=sushi"`
`"http://api.votesmart.org/Measure.getMeasure?key=<YOUR-API-KEY>&measureId=1632"`
`"http://api.votesmart.org/CandidateBio.getBio?key=<YOUR-API-KEY>&candidateId=9490"`
`"https://maps.googleapis.com/maps/api/place/radarsearch/json?location=47.6823988.822546&radius=50000&sensor=false&keyword=&name=&types=restaurant&key=<YOUR-API-KEY>"`
`"https://disqus.com/api/3.0/threads/set.json?&thread=ident:1.18176733&thread=ident:1.18176650&thread=ident:1.18176644&thread=ident:1.18176619&thread=ident:1.18175246&forum=nzz&api_key=<YOUR-API-KEY>"`
NOTE
=====
It might happen that some of the listed web APIs are either temporarily not reacheable or were put out of business. This list will be updated periodically to keep track of such changes and add additional examples. In any case, it can happen that this list is not a 100% up to date at some point in time and it thus cannot be guaranteed that all of the shown examples work perfectly if changes are made on the server side.