diff --git a/.Rbuildignore b/.Rbuildignore index 87bee57..9eb942d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ man-roxygen/* ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^cran-comments\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 215b7e7..34e9178 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dataverse -Version: 0.2.1.9002 +Version: 0.3.0 Title: Client for Dataverse 4 Repositories Authors@R: c( person( @@ -14,18 +14,18 @@ Authors@R: c( email = "thosjleeper@gmail.com", comment = c(ORCID = "0000-0003-4097-6326") ), - person( - "Philip", "Durbin", - role = c("aut"), - email = "philipdurbin@gmail.com", - comment = c(ORCID = "0000-0002-9528-9470") - ), person( "Shiro", "Kuriwaki", role = c("aut"), email = "shirokuriwaki@gmail.com", comment = c(ORCID = "0000-0002-5687-2647") ), + person( + "Philip", "Durbin", + role = c("aut"), + email = "philipdurbin@gmail.com", + comment = c(ORCID = "0000-0002-9528-9470") + ), person( "Sebastian", "Karcher", role=c("aut"), @@ -49,13 +49,15 @@ Suggests: haven, knitr, purrr, + rmarkdown, testthat, UNF, yaml -Description: Provides access to Dataverse version 4 APIs , - enabling data search, retrieval, and deposit. For Dataverse versions <= 4.0, - use the deprecated 'dvn' package . +Description: Provides access to Dataverse APIs (versions 4-5), + enabling data search, retrieval, and deposit. For Dataverse versions <= 3.0, + use the archived 'dvn' package . License: GPL-2 +LazyData: true URL: https://github.com/iqss/dataverse-client-r BugReports: https://github.com/iqss/dataverse-client-r/issues VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index a12718a..3b692a8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,10 @@ -# CHANGES TO dataverse 0.2.2 (upcoming) +# CHANGES TO dataverse 0.3.0 + +New Methods + +* Add new `get_dataframe_*()` methods (#48, #66) + +Small updates * Make filter queries (fq) work in `dataverse_search` (#36 @adam3smith) * Update maintainer to Will Beasley (wibeasley@hotmail.com) (#38) diff --git a/R/get_dataframe.R b/R/get_dataframe.R index 09c9101..c6350be 100644 --- a/R/get_dataframe.R +++ b/R/get_dataframe.R @@ -1,7 +1,9 @@ -#' Get file from dataverse and convert it into a dataframe or tibble +#' Download dataverse file as a dataframe #' -#' `get_dataframe_by_id`, if you know the numeric ID of the dataset, or instead -#' `get_dataframe_by_name` if you know the filename and doi. The dataset +#' Use `get_dataframe_by_name` if you know the name of the datafile and the DOI +#' of the dataset. Use `get_dataframe_by_doi` if you know the DOI of the datafile +#' itself. Use `get_dataframe_by_id` if you know the numeric ID of the +#' datafile. #' #' @rdname get_dataframe #' @@ -9,9 +11,9 @@ #' `"roster-bulls-1996.tab"`. #' @param .f The function to used for reading in the raw dataset. This user #' must choose the appropriate function: for example if the target is a .rds -#' file, then `.f` should be `readRDS` or `readr::read_`rds`. +#' file, then `.f` should be `readRDS` or `readr::read_rds`. #' @param original A logical, defaulting to TRUE. Whether to read the ingested, -#' archival version of the dataset if one exists. The archival versions are tab-delimited +#' archival version of the datafile if one exists. The archival versions are tab-delimited #' `.tab` files so if `original = FALSE`, `.f` is set to `readr::read_tsv`. #' If functions to read the original version is available, then `original = TRUE` #' with a specified `.f` is better. @@ -19,35 +21,36 @@ #' @inheritDotParams get_file #' #' @examples -#' #' # Retrieve data.frame from dataverse DOI and file name -#' df_from_rds_ingested <- +#' df_tab <- #' get_dataframe_by_name( #' filename = "roster-bulls-1996.tab", #' dataset = "doi:10.70122/FK2/HXJVJU", #' server = "demo.dataverse.org" #' ) #' -#' # Retrieve the same data.frame from dataverse + file DOI -#' df_from_rds_ingested_by_doi <- +#' # Retrieve the same file from file DOI +#' df_tab <- #' get_dataframe_by_doi( #' filedoi = "10.70122/FK2/HXJVJU/SA3Z2V", #' server = "demo.dataverse.org" #' ) #' +#' # Do not run when submitting to CRAN, because the whole +#' # example sometimes takes longer than 10 sec. +#' \dontrun{ #' # Retrieve ingested file originally a Stata dta #' df_from_stata_ingested <- #' get_dataframe_by_name( #' filename = "nlsw88.tab", #' dataset = "doi:10.70122/FK2/PPIAXE", #' server = "demo.dataverse.org" -#' ) -#' +#' ) #' #' # To use the original file version, or for non-ingested data, #' # please specify `original = TRUE` and specify a function in .f. #' -#' # A data.frame is still returned, but the +# Rds files are not ingested so original = TRUE and .f is required. #' if (requireNamespace("readr", quietly = TRUE)) { #' df_from_rds_original <- #' get_dataframe_by_name( @@ -56,19 +59,31 @@ #' server = "demo.dataverse.org", #' original = TRUE, #' .f = readr::read_rds -#' ) +#' ) #' } #' +#' # Get Stata file as original #' if (requireNamespace("haven", quietly = TRUE)) { -#' df_from_stata_original <- +#' df_stata_original <- #' get_dataframe_by_name( #' filename = "nlsw88.tab", #' dataset = "doi:10.70122/FK2/PPIAXE", #' server = "demo.dataverse.org", #' original = TRUE, #' .f = haven::read_dta -#' ) +#' ) #' } +#' +#' # Stata file as ingested file (less information than original) +#' df_stata_ingested <- +#' get_dataframe_by_name( +#' filename = "nlsw88.tab", +#' dataset = "doi:10.70122/FK2/PPIAXE", +#' server = "demo.dataverse.org" +#' ) +#' +#' } +#' #' @export get_dataframe_by_name <- function ( filename, diff --git a/R/get_file.R b/R/get_file.R index 7964f42..9754af9 100644 --- a/R/get_file.R +++ b/R/get_file.R @@ -1,20 +1,21 @@ #' @rdname files #' -#' @title Download File +#' @title Download dataverse file as a raw binary #' -#' @description Download Dataverse File(s). `get_file` is a general wrapper, -#' and can take either dataverse objects, file IDs, or a filename and dataverse. +#' @description Download Dataverse File(s). `get_file_*` +#' functions return a raw binary file, which cannot be readily analyzed in R. +#' To use the objects as dataframes, see the `get_dataset_*` functions at +#' \link{get_dataset} instead. +#' +#' @details This function provides access to data files from a Dataverse entry. +#' `get_file` is a general wrapper, +#' and can take either dataverse objects, file IDs, or a filename and dataverse. +#' Internally, all functions download each file by `get_file_by_id`. #' `get_file_by_name` is a shorthand for running `get_file` by #' specifying a file name (`filename`) and dataset (`dataset`). #' `get_file_by_doi` obtains a file by its file DOI, bypassing the #' `dataset` argument. #' -#' Internally, all functions download each file by `get_file_by_id`. `get_file_*` -#' functions return a raw binary file, which cannot be readily analyzed in R. -#' To use the objects as dataframes, see the `get_dataset_*` functions at \link{get_dataset} -#' -#' @details This function provides access to data files from a Dataverse entry. -#' #' @param file An integer specifying a file identifier; or a vector of integers #' specifying file identifiers; or, if used with the prefix \code{"doi:"}, a #' character with the file-specific DOI; or, if used without the prefix, a diff --git a/README.Rmd b/README.Rmd index 7f9a656..847ea41 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,5 +1,5 @@ --- -title: "R Client for Dataverse 4 Repositories" +title: "R Client for Dataverse Repositories" output: github_document --- @@ -11,9 +11,9 @@ Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") [![CRAN Version](https://www.r-pkg.org/badges/version/dataverse)](https://cran.r-project.org/package=dataverse) ![Downloads](https://cranlogs.r-pkg.org/badges/dataverse) [![Travis-CI Build Status](https://travis-ci.org/IQSS/dataverse-client-r.png?branch=master)](https://travis-ci.org/IQSS/dataverse-client-r) [![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) -[![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png "Dataverse Project")](https://dataverse.org) +[![Dataverse Project logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org) -The **dataverse** package provides access to [Dataverse 4](https://dataverse.org/) APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting. +The **dataverse** package provides access to [Dataverse](https://dataverse.org/) APIs (versions 4-5), enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. **dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 ("Dataverse Network") applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) **sword** package for data deposit and the **UNF** package for data fingerprinting. ### Getting Started @@ -32,7 +32,7 @@ library("dataverse") #### Keys -Some features of the Dataverse 4 API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: +Some features of the Dataverse API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the specific server installation of the Dataverse software, and an API key linked to that account. Instructions for obtaining an account and setting up an API key are available in the [Dataverse User Guide](https://guides.dataverse.org/en/latest/user/account.html). (Note: if your key is compromised, it can be regenerated to preserve security.) Once you have an API key, this should be stored as an environment variable called `DATAVERSE_KEY`. It can be set within R using: ``` r Sys.setenv("DATAVERSE_KEY" = "examplekey12345") diff --git a/README.md b/README.md index 77cdbf4..bef59be 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -R Client for Dataverse 4 Repositories +R Client for Dataverse Repositories ================ [![CRAN @@ -9,14 +9,13 @@ Status](https://travis-ci.org/IQSS/dataverse-client-r.png?branch=master)](https: [![codecov.io](https://codecov.io/github/IQSS/dataverse-client-r/coverage.svg?branch=master)](https://codecov.io/github/IQSS/dataverse-client-r?branch=master) [![Dataverse Project -logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png -"Dataverse Project")](https://dataverse.org) +logo](https://dataverse.org/files/dataverseorg/files/dataverse_project_logo-hp.png)](https://dataverse.org) The **dataverse** package provides access to -[Dataverse 4](https://dataverse.org/) APIs, enabling data search, -retrieval, and deposit, thus allowing R users to integrate public data -sharing into the reproducible research workflow. **dataverse** is the -next-generation iteration of [the **dvn** +[Dataverse](https://dataverse.org/) APIs (versions 4-5), enabling data +search, retrieval, and deposit, thus allowing R users to integrate +public data sharing into the reproducible research workflow. +**dataverse** is the next-generation iteration of [the **dvn** package](https://cran.r-project.org/package=dvn), which works with Dataverse 3 (“Dataverse Network”) applications. **dataverse** includes numerous improvements for data search, retrieval, and deposit, including @@ -35,7 +34,7 @@ library("dataverse") #### Keys -Some features of the Dataverse 4 API are public and require no +Some features of the Dataverse API are public and require no authentication. This means in many cases you can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. But, other features require a Dataverse account for the @@ -53,12 +52,13 @@ Sys.setenv("DATAVERSE_KEY" = "examplekey12345") #### Server -Because [there are many Dataverse installations](https://dataverse.org/), -all functions in the R client require specifying what server -installation you are interacting with. This can be set by default with -an environment variable, `DATAVERSE_SERVER`. This should be the -Dataverse server, without the “https” prefix or the “/api” URL path, -etc. For example, the Harvard Dataverse can be used by setting: +Because [there are many Dataverse +installations](https://dataverse.org/), all functions in the R client +require specifying what server installation you are interacting with. +This can be set by default with an environment variable, +`DATAVERSE_SERVER`. This should be the Dataverse server, without the +“https” prefix or the “/api” URL path, etc. For example, the Harvard +Dataverse can be used by setting: ``` r Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu") @@ -99,7 +99,7 @@ nlsw <- ## Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE. - ## + ## ## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────── ## cols( ## idcode = col_double(), @@ -164,7 +164,8 @@ nlsw_original <- ) ``` -Note that even though the file prefix is “.tab”, we use `read_dta`. +Note that even though the file prefix is “.tab”, we use +`haven::read_dta`. Of course, when the dataset is not ingested (such as a Rds file), users would always need to specify an `.f` argument for the specific file. @@ -183,7 +184,7 @@ class(nlsw_tsv$race) # tab ingested version only has numeric data attr(nlsw_original$race, "labels") # original dta has value labels ``` - ## white black other + ## white black other ## 1 2 3 #### Reading a dataset as a binary file. @@ -220,7 +221,7 @@ get_dataset( ) ``` - ## Dataset (182162): + ## Dataset (182162): ## Version: 1.1, RELEASED ## Release Date: 2020-12-30T00:00:24Z ## License: CC0 @@ -256,13 +257,12 @@ subsequent pages, specify `start`. ### Data Archiving -Dataverse provides two - basically unrelated - workflows for managing -(adding, documenting, and publishing) datasets. The first is built on -[SWORD v2.0](http://swordapp.org/sword-v2/). This means that to create a -new dataset listing, you will have to first initialize a dataset entry with -some metadata, add one or more files to the dataset, and then publish -it. This looks something like the following: - +Dataverse provides two - basically unrelated - workflows for managing +(adding, documenting, and publishing) datasets. The first is built on +[SWORD v2.0](http://swordapp.org/sword-v2/). This means that to create a +new dataset listing, you will have to first initialize a dataset entry +with some metadata, add one or more files to the dataset, and then +publish it. This looks something like the following: ``` r # retrieve your service document @@ -324,6 +324,6 @@ Scott Chamberlain’s [oai](https://cran.r-project.org/package=oai), which offer metadata download from any web repository that is compliant with the [Open Archives Initiative](http://www.openarchives.org/) standards. Additionally, [rdryad](https://cran.r-project.org/package=rdryad) uses -OAIHarvester to interface with [Dryad](http://datadryad.org/). The +OAIHarvester to interface with [Dryad](https://datadryad.org/stash). The [rfigshare](https://cran.r-project.org/package=rfigshare) package works in a similar spirit to **dataverse** with . diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..b674f34 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,27 @@ +Description +----------------------------------------------- + +This submission includes new features and updates to stay compliant with R checks. + +A second change is that I am now the package maintainer, taking over from Thomas J. Leeper (thosjleeper@gmail.com). See https://github.com/IQSS/dataverse-client-r/issues/42 and https://github.com/IQSS/dataverse-client-r/issues/21. + +The first submission on Jan 17/18 was rejected because the CRAN check had three notes that one documentation example exceeded 10 seconds. In response, I've added a `dontrun{}` block on most of that example. + +Thank you for taking the time to review my submission, and please tell me if there's something else I should do for CRAN. -Will Beasley + + +Test environments +----------------------------------------------- + +1. Local Ubuntu, R 4.0.3 +1. Local Win10, R 4.0.3 Patched +1. [r-hub](https://builder.r-hub.io/status/dataverse_0.3.0.tar.gz-905624c45a92467eb688858acab1a13) +1. [win-builder](https://win-builder.r-project.org/xYyWrC1uFjXH), development version. +1. [Travis CI](https://travis-ci.org/github/IQSS/dataverse-client-r), Ubuntu 18.04 LTS + + +R CMD check results +----------------------------------------------- + +* No ERRORs or WARNINGs on any builds. +* One NOTE about the new package maintainer diff --git a/docs/404.html b/docs/404.html index c20f9e2..48ccbc9 100644 --- a/docs/404.html +++ b/docs/404.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/ISSUE_TEMPLATE.html b/docs/ISSUE_TEMPLATE.html index c52c35a..9da4736 100644 --- a/docs/ISSUE_TEMPLATE.html +++ b/docs/ISSUE_TEMPLATE.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/PULL_REQUEST_TEMPLATE.html b/docs/PULL_REQUEST_TEMPLATE.html index 6de5746..97afc15 100644 --- a/docs/PULL_REQUEST_TEMPLATE.html +++ b/docs/PULL_REQUEST_TEMPLATE.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/articles/A-introduction.html b/docs/articles/A-introduction.html index 0bdf2f6..0c8a287 100644 --- a/docs/articles/A-introduction.html +++ b/docs/articles/A-introduction.html @@ -31,7 +31,7 @@ dataverse - 0.2.1.9002 + 0.3.0 @@ -121,7 +121,7 @@

2017-06-13

Quick Start

-

Dataverse has some terminology that is worth quickly reviewing before showing how to work with Dataverse in R. Dataverse is an application that can be installed in many places. As a result, dataverse can work with any instllation but you need to specify which installation you want to work with. This can be set by default with an environment variable, DATAVERSE_SERVER:

+

Dataverse has some terminology that is worth quickly reviewing before showing how to work with Dataverse in R. Dataverse is an application that can be installed in many places. As a result, dataverse can work with any installation but you need to specify which installation you want to work with. This can be set by default with an environment variable, DATAVERSE_SERVER:

 library("dataverse")
 Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
@@ -130,7 +130,7 @@

You can search for and retrieve data without a Dataverse account for that a specific Dataverse installation. For example, to search for data files or datasets that mention “ecological inference”, we can just do:

 dataverse_search("ecological inference")[c("name", "type", "description")]
-

The search vignette describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - get_file() - can retrieve the files as raw vectors:

+

The search vignette describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - get_file() - can retrieve the files as raw vectors:

 get_dataset()
 dataset_files()
diff --git a/docs/articles/B-search.html b/docs/articles/B-search.html
index dff2407..978a49b 100644
--- a/docs/articles/B-search.html
+++ b/docs/articles/B-search.html
@@ -31,7 +31,7 @@
       
       
         dataverse
-        0.2.1.9002
+        0.3.0
       
     
diff --git a/docs/articles/C-retrieval.html b/docs/articles/C-retrieval.html index b5258d7..33fb551 100644 --- a/docs/articles/C-retrieval.html +++ b/docs/articles/C-retrieval.html @@ -31,7 +31,7 @@ dataverse - 0.2.1.9002 + 0.3.0

@@ -102,7 +102,7 @@

2017-06-15

-

This vignette shows how to download data from Dataverse using the dataverse package. We’ll focus on a Dataverse repository that contains supplemental files for Jamie Monogan’s book Political Analysis Using R, which is stored at Harvard University’s IQSS Dataverse Network:

+

This vignette shows how to download data from Dataverse using the dataverse package. We’ll focus on a Dataverse repository that contains supplemental files for Political Analysis Using R, which is stored at Harvard University’s IQSS Dataverse Network:

Monogan, Jamie, 2015, “Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems”, doi:10.7910/DVN/ARKOTI, Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==

@@ -294,14 +294,14 @@

Reusing Files and Reproducing Analysis

-

To reproduce the analysis, we can simply run the code file either as a system() call or directly in R using source() (note this particular file begins with an rm() call so you may want to run it in a new enviroment):

+

To reproduce the analysis, we can simply run the code file either as a system() call or directly in R using source() (note this particular file begins with an rm() call so you may want to run it in a new environment):

 # Option 1
 system("Rscript chapter03.R")
 
 # Option 2
 source("chapter03.R", local=new.env())
-

Any well-produced set of analysis reproduction files, like this one, should run without error once the data and code are in-hand. Troubleshooting anlaysis files is beyond the scope of this vignette, but common sources are

+

Any well-produced set of analysis reproduction files, like this one, should run without error once the data and code are in-hand. Troubleshooting analysis files is beyond the scope of this vignette, but common sources are

  1. The working directory is not set the same as the author intended. This could affect code files not finding the relative position of datasets or of other code files.
  2. Your local machine hasn’t downloaded or installed all the necessary datasets and packages.
  3. diff --git a/docs/articles/D-archiving.html b/docs/articles/D-archiving.html index 4316f7c..c43c1b3 100644 --- a/docs/articles/D-archiving.html +++ b/docs/articles/D-archiving.html @@ -31,7 +31,7 @@ dataverse - 0.2.1.9002 + 0.3.0
@@ -109,7 +109,7 @@

2017-06-15

SWORD-based Data Archiving

-

The main data archiving (or “deposit”) workflow for Dataverse is built on SWORD v2.0. This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

+

The main data archiving (or “deposit”) workflow for Dataverse is built on SWORD v2.0. This means that to create a new dataset listing, you will have to first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

 # retrieve your service document
 d <- service_document()
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 682428b..13d032e 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -71,7 +71,7 @@
       
       
         dataverse
-        0.2.1.9002
+        0.3.0
       
     
diff --git a/docs/authors.html b/docs/authors.html index 9ccabe4..26db6b5 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0
@@ -135,11 +135,11 @@

Citation

Source: inst/CITATION -

Thomas J. Leeper (). dataverse: R Client for Dataverse 4. R package version 0.2.1.9002.

+

Thomas J. Leeper (). dataverse: R Client for Dataverse 4. R package version 0.3.0.

@Manual{,
   title = {dataverse: R Client for Dataverse 4},
   author = {Thomas J. Leeper},
-  note = {R package version 0.2.1.9002},
+  note = {R package version 0.3.0},
 }
@@ -96,7 +96,7 @@ -

Dataverse Project logo

+

Dataverse Project logo

The dataverse package provides access to Dataverse 4 APIs, enabling data search, retrieval, and deposit, thus allowing R users to integrate public data sharing into the reproducible research workflow. dataverse is the next-generation iteration of the dvn package, which works with Dataverse 3 (“Dataverse Network”) applications. dataverse includes numerous improvements for data search, retrieval, and deposit, including use of the (currently in development) sword package for data deposit and the UNF package for data fingerprinting.

@@ -138,7 +138,7 @@

)

## Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
 
-##
+## 
 ## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
 ## cols(
 ##   idcode = col_double(),
@@ -180,7 +180,7 @@ 

original = TRUE, server = "demo.dataverse.org" )

-

Note that even though the file prefix is “.tab”, we use read_dta.

+

Note that even though the file prefix is “.tab”, we use haven::read_dta.

Of course, when the dataset is not ingested (such as a Rds file), users would always need to specify an .f argument for the specific file.

Note the difference between nls_tsv and nls_original. nls_original preserves the data attributes like value labels, whereas nls_tsv has dropped this or left this in file metadata.

@@ -188,7 +188,7 @@ 

## [1] "numeric"
 attr(nlsw_original$race, "labels") # original dta has value labels
-
## white black other
+
## white black other 
 ##     1     2     3
@@ -214,7 +214,7 @@

dataset = "10.70122/FK2/PPIAXE", server = "demo.dataverse.org" )

-
## Dataset (182162):
+
## Dataset (182162): 
 ## Version: 1.1, RELEASED
 ## Release Date: 2020-12-30T00:00:24Z
 ## License: CC0
@@ -241,7 +241,7 @@ 

Data Archiving

-

Dataverse provides two - basically unrelated - workflows for managing (adding, documenting, and publishing) datasets. The first is built on SWORD v2.0. This means that to create a new dataset listing, you will have first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

+

Dataverse provides two - basically unrelated - workflows for managing (adding, documenting, and publishing) datasets. The first is built on SWORD v2.0. This means that to create a new dataset listing, you will have to first initialize a dataset entry with some metadata, add one or more files to the dataset, and then publish it. This looks something like the following:

 # retrieve your service document
 d <- service_document()
@@ -287,7 +287,7 @@ 

Other Installations

-

Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik’s OAIHarvester and Scott Chamberlain’s oai, which offer metadata download from any web repository that is compliant with the Open Archives Initiative standards. Additionally, rdryad uses OAIHarvester to interface with Dryad. The rfigshare package works in a similar spirit to dataverse with https://figshare.com/.

+

Users interested in downloading metadata from archives other than Dataverse may be interested in Kurt Hornik’s OAIHarvester and Scott Chamberlain’s oai, which offer metadata download from any web repository that is compliant with the Open Archives Initiative standards. Additionally, rdryad uses OAIHarvester to interface with Dryad. The rfigshare package works in a similar spirit to dataverse with https://figshare.com/.

diff --git a/docs/news/index.html b/docs/news/index.html index d9988b7..bfbe74e 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index e7f6cff..ca6d720 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -6,7 +6,7 @@ articles: B-search: B-search.html C-retrieval: C-retrieval.html D-archiving: D-archiving.html -last_built: 2021-01-17T17:13Z +last_built: 2021-01-18T17:11Z urls: reference: https://IQSS.github.io/dataverse-client-r/reference article: https://IQSS.github.io/dataverse-client-r/articles diff --git a/docs/reference/add_dataset_file.html b/docs/reference/add_dataset_file.html index 7400029..92c9607 100644 --- a/docs/reference/add_dataset_file.html +++ b/docs/reference/add_dataset_file.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0
diff --git a/docs/reference/add_file.html b/docs/reference/add_file.html index cd64134..187c17d 100644 --- a/docs/reference/add_file.html +++ b/docs/reference/add_file.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/create_dataset.html b/docs/reference/create_dataset.html index 2ffe7e9..4e81278 100644 --- a/docs/reference/create_dataset.html +++ b/docs/reference/create_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 @@ -201,7 +201,7 @@

Value

An object of class “dataverse_dataset”.

Details

-

create_dataset creates a Dataverse dataset. In Dataverse, a “dataset” is the lowest-level structure in which to organize files. For example, a Dataverse dataset might contain the files used to reproduce a published article, including data, analysis code, and related materials. Datasets can be organized into “Dataverse” objects, which can be further nested within other Dataverses. For someone creating an archive, this would be the first step to producing said archive (after creating a Dataverse, if one does not already exist). Once files and metadata have been added, the dataset can be publised (i.e., made public) using publish_dataset.

+

create_dataset creates a Dataverse dataset. In Dataverse, a “dataset” is the lowest-level structure in which to organize files. For example, a Dataverse dataset might contain the files used to reproduce a published article, including data, analysis code, and related materials. Datasets can be organized into “Dataverse” objects, which can be further nested within other Dataverses. For someone creating an archive, this would be the first step to producing said archive (after creating a Dataverse, if one does not already exist). Once files and metadata have been added, the dataset can be published (i.e., made public) using publish_dataset.

update_dataset updates a Dataverse dataset that has already been created using create_dataset. This creates a draft version of the dataset or modifies the current draft if one is already in-progress. It does not assign a new version number to the dataset nor does it make it publicly visible (which can be done with publish_dataset).

See also

diff --git a/docs/reference/create_dataverse.html b/docs/reference/create_dataverse.html index 5d5b83e..19b3d97 100644 --- a/docs/reference/create_dataverse.html +++ b/docs/reference/create_dataverse.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/dataset_atom.html b/docs/reference/dataset_atom.html index 3936166..9d8b128 100644 --- a/docs/reference/dataset_atom.html +++ b/docs/reference/dataset_atom.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/dataset_versions.html b/docs/reference/dataset_versions.html index 67c54e1..9d497f7 100644 --- a/docs/reference/dataset_versions.html +++ b/docs/reference/dataset_versions.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/dataverse.html b/docs/reference/dataverse.html index 841223f..dafc0cd 100644 --- a/docs/reference/dataverse.html +++ b/docs/reference/dataverse.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/dataverse_metadata.html b/docs/reference/dataverse_metadata.html index 9d88281..e99ddf9 100644 --- a/docs/reference/dataverse_metadata.html +++ b/docs/reference/dataverse_metadata.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/dataverse_search.html b/docs/reference/dataverse_search.html index 943ee2a..62f6fc4 100644 --- a/docs/reference/dataverse_search.html +++ b/docs/reference/dataverse_search.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/delete_dataset.html b/docs/reference/delete_dataset.html index 5c726c7..6de0994 100644 --- a/docs/reference/delete_dataset.html +++ b/docs/reference/delete_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/delete_dataverse.html b/docs/reference/delete_dataverse.html index f8ebedf..6ef7158 100644 --- a/docs/reference/delete_dataverse.html +++ b/docs/reference/delete_dataverse.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/delete_file.html b/docs/reference/delete_file.html index 580e178..501260b 100644 --- a/docs/reference/delete_file.html +++ b/docs/reference/delete_file.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/delete_sword_dataset.html b/docs/reference/delete_sword_dataset.html index 4b81c64..df44fe8 100644 --- a/docs/reference/delete_sword_dataset.html +++ b/docs/reference/delete_sword_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/files.html b/docs/reference/files.html index add2fbe..fb70ed1 100644 --- a/docs/reference/files.html +++ b/docs/reference/files.html @@ -80,7 +80,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/get_dataframe.html b/docs/reference/get_dataframe.html index 2703a77..1260a63 100644 --- a/docs/reference/get_dataframe.html +++ b/docs/reference/get_dataframe.html @@ -73,7 +73,7 @@ dataverse - 0.2.1.9002 + 0.3.0 @@ -223,8 +223,7 @@

Arg

Examples

-
-# Retrieve data.frame from dataverse DOI and file name +
# Retrieve data.frame from dataverse DOI and file name df_from_rds_ingested <- get_dataframe_by_name( filename = "roster-bulls-1996.tab", @@ -263,34 +262,17 @@

Examp #> experience_years = col_double(), #> college = col_character() #> )

+# Do not run when submitting to CRAN, because the whole +# example sometimes takes longer than 10 sec. +if (FALSE) { # Retrieve ingested file originally a Stata dta df_from_stata_ingested <- get_dataframe_by_name( filename = "nlsw88.tab", dataset = "doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org" - ) -
#> Downloading ingested version of data with readr::read_tsv. To download the original version and remove this message, set original = TRUE.
#> -#> ── Column specification ──────────────────────────────────────────────────────── -#> cols( -#> idcode = col_double(), -#> age = col_double(), -#> race = col_double(), -#> married = col_double(), -#> never_married = col_double(), -#> grade = col_double(), -#> collgrad = col_double(), -#> south = col_double(), -#> smsa = col_double(), -#> c_city = col_double(), -#> industry = col_double(), -#> occupation = col_double(), -#> union = col_double(), -#> wage = col_double(), -#> hours = col_double(), -#> ttl_exp = col_double(), -#> tenure = col_double() -#> )
+ ) + # To use the original file version, or for non-ingested data, # please specify `original = TRUE` and specify a function in .f. @@ -304,7 +286,7 @@

Examp server = "demo.dataverse.org", original = TRUE, .f = readr::read_rds - ) + ) } if (requireNamespace("haven", quietly = TRUE)) { @@ -315,8 +297,10 @@

Examp server = "demo.dataverse.org", original = TRUE, .f = haven::read_dta - ) + ) } +} +

diff --git a/docs/reference/get_dataset.html b/docs/reference/get_dataset.html index 8907f9f..919a17f 100644 --- a/docs/reference/get_dataset.html +++ b/docs/reference/get_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/get_dataverse.html b/docs/reference/get_dataverse.html index 74a8ae7..ffa84a8 100644 --- a/docs/reference/get_dataverse.html +++ b/docs/reference/get_dataverse.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/get_facets.html b/docs/reference/get_facets.html index 694ae54..1368bba 100644 --- a/docs/reference/get_facets.html +++ b/docs/reference/get_facets.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/get_file_metadata.html b/docs/reference/get_file_metadata.html index 9eaf7d6..eea4df1 100644 --- a/docs/reference/get_file_metadata.html +++ b/docs/reference/get_file_metadata.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/get_user_key.html b/docs/reference/get_user_key.html index c665dca..6bc588a 100644 --- a/docs/reference/get_user_key.html +++ b/docs/reference/get_user_key.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/index.html b/docs/reference/index.html index af54628..fcaa10f 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -71,7 +71,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/initiate_sword_dataset.html b/docs/reference/initiate_sword_dataset.html index 77a6ea5..40ed417 100644 --- a/docs/reference/initiate_sword_dataset.html +++ b/docs/reference/initiate_sword_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/is_ingested.html b/docs/reference/is_ingested.html index 1ff70da..78874e9 100644 --- a/docs/reference/is_ingested.html +++ b/docs/reference/is_ingested.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/list_datasets.html b/docs/reference/list_datasets.html index 22424db..386f6db 100644 --- a/docs/reference/list_datasets.html +++ b/docs/reference/list_datasets.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/publish_dataset.html b/docs/reference/publish_dataset.html index aa139bc..378c020 100644 --- a/docs/reference/publish_dataset.html +++ b/docs/reference/publish_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/publish_dataverse.html b/docs/reference/publish_dataverse.html index 5d1bd92..e313f9d 100644 --- a/docs/reference/publish_dataverse.html +++ b/docs/reference/publish_dataverse.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/publish_sword_dataset.html b/docs/reference/publish_sword_dataset.html index 194f299..6031218 100644 --- a/docs/reference/publish_sword_dataset.html +++ b/docs/reference/publish_sword_dataset.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/service_document.html b/docs/reference/service_document.html index 695d136..877311b 100644 --- a/docs/reference/service_document.html +++ b/docs/reference/service_document.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/docs/reference/set_dataverse_metadata.html b/docs/reference/set_dataverse_metadata.html index 4f97254..2fccb38 100644 --- a/docs/reference/set_dataverse_metadata.html +++ b/docs/reference/set_dataverse_metadata.html @@ -72,7 +72,7 @@ dataverse - 0.2.1.9002 + 0.3.0 diff --git a/for-developers/developer-tasks.R b/for-developers/developer-tasks.R index 9b61d04..ade6b61 100644 --- a/for-developers/developer-tasks.R +++ b/for-developers/developer-tasks.R @@ -33,6 +33,6 @@ lintr::lint("R/add_dataset_file.R") # devtools::check(force_suggests = FALSE) devtools::check(cran=T) # devtools::check_rhub(email="wibeasley@hotmail.com") -# devtools::build_win(version="R-devel") #CRAN submission policies encourage the development version +# devtools::check_win_devel() #CRAN submission policies encourage the development version # devtools::revdep_check(pkg="REDCapR", recursive=TRUE) # devtools::release(check=FALSE) #Careful, the last question ultimately uploads it to CRAN, where you can't delete/reverse your decision. diff --git a/man/create_dataset.Rd b/man/create_dataset.Rd index 948af53..cb31ca3 100644 --- a/man/create_dataset.Rd +++ b/man/create_dataset.Rd @@ -51,7 +51,7 @@ An object of class \dQuote{dataverse_dataset}. Create or update dataset within a Dataverse } \details{ -\code{create_dataset} creates a Dataverse dataset. In Dataverse, a \dQuote{dataset} is the lowest-level structure in which to organize files. For example, a Dataverse dataset might contain the files used to reproduce a published article, including data, analysis code, and related materials. Datasets can be organized into \dQuote{Dataverse} objects, which can be further nested within other Dataverses. For someone creating an archive, this would be the first step to producing said archive (after creating a Dataverse, if one does not already exist). Once files and metadata have been added, the dataset can be publised (i.e., made public) using \code{\link{publish_dataset}}. +\code{create_dataset} creates a Dataverse dataset. In Dataverse, a \dQuote{dataset} is the lowest-level structure in which to organize files. For example, a Dataverse dataset might contain the files used to reproduce a published article, including data, analysis code, and related materials. Datasets can be organized into \dQuote{Dataverse} objects, which can be further nested within other Dataverses. For someone creating an archive, this would be the first step to producing said archive (after creating a Dataverse, if one does not already exist). Once files and metadata have been added, the dataset can be published (i.e., made public) using \code{\link{publish_dataset}}. \code{update_dataset} updates a Dataverse dataset that has already been created using \code{\link{create_dataset}}. This creates a draft version of the dataset or modifies the current draft if one is already in-progress. It does not assign a new version number to the dataset nor does it make it publicly visible (which can be done with \code{\link{publish_dataset}}). } diff --git a/man/files.Rd b/man/files.Rd index 7137ad5..6eb2c6a 100644 --- a/man/files.Rd +++ b/man/files.Rd @@ -5,7 +5,7 @@ \alias{get_file_by_name} \alias{get_file_by_id} \alias{get_file_by_doi} -\title{Download File} +\title{Download dataverse file as a raw binary} \usage{ get_file( file, @@ -107,19 +107,20 @@ function. To load datasets into the R environment dataframe, see \link{get_dataframe_by_name}. } \description{ -Download Dataverse File(s). \code{get_file} is a general wrapper, +Download Dataverse File(s). \verb{get_file_*} +functions return a raw binary file, which cannot be readily analyzed in R. +To use the objects as dataframes, see the \verb{get_dataset_*} functions at +\link{get_dataset} instead. +} +\details{ +This function provides access to data files from a Dataverse entry. +\code{get_file} is a general wrapper, and can take either dataverse objects, file IDs, or a filename and dataverse. +Internally, all functions download each file by \code{get_file_by_id}. \code{get_file_by_name} is a shorthand for running \code{get_file} by specifying a file name (\code{filename}) and dataset (\code{dataset}). \code{get_file_by_doi} obtains a file by its file DOI, bypassing the \code{dataset} argument. - -Internally, all functions download each file by \code{get_file_by_id}. \verb{get_file_*} -functions return a raw binary file, which cannot be readily analyzed in R. -To use the objects as dataframes, see the \verb{get_dataset_*} functions at \link{get_dataset} -} -\details{ -This function provides access to data files from a Dataverse entry. } \examples{ \dontrun{ diff --git a/man/get_dataframe.Rd b/man/get_dataframe.Rd index 6f4f4c1..f60ec63 100644 --- a/man/get_dataframe.Rd +++ b/man/get_dataframe.Rd @@ -4,7 +4,7 @@ \alias{get_dataframe_by_name} \alias{get_dataframe_by_id} \alias{get_dataframe_by_doi} -\title{Get file from dataverse and convert it into a dataframe or tibble} +\title{Download dataverse file as a dataframe} \usage{ get_dataframe_by_name( filename, @@ -28,10 +28,10 @@ for example \code{"doi:10.70122/FK2/HXJVJU"}. Alternatively, an object of class \item{.f}{The function to used for reading in the raw dataset. This user must choose the appropriate function: for example if the target is a .rds -file, then \code{.f} should be \code{readRDS} or \code{readr::read_}rds`.} +file, then \code{.f} should be \code{readRDS} or \code{readr::read_rds}.} \item{original}{A logical, defaulting to TRUE. Whether to read the ingested, -archival version of the dataset if one exists. The archival versions are tab-delimited +archival version of the datafile if one exists. The archival versions are tab-delimited \code{.tab} files so if \code{original = FALSE}, \code{.f} is set to \code{readr::read_tsv}. If functions to read the original version is available, then \code{original = TRUE} with a specified \code{.f} is better.} @@ -68,39 +68,41 @@ or globally using \code{Sys.setenv("DATAVERSE_SERVER" = "dataverse.example.com") \code{"10.70122/FK2/PPIAXE/MHDB0O"} or \code{"doi:10.70122/FK2/PPIAXE/MHDB0O"}} } \description{ -\code{get_dataframe_by_id}, if you know the numeric ID of the dataset, or instead -\code{get_dataframe_by_name} if you know the filename and doi. The dataset +Use \code{get_dataframe_by_name} if you know the name of the datafile and the DOI +of the dataset. Use \code{get_dataframe_by_doi} if you know the DOI of the datafile +itself. Use \code{get_dataframe_by_id} if you know the numeric ID of the +datafile. } \examples{ - # Retrieve data.frame from dataverse DOI and file name -df_from_rds_ingested <- +df_tab <- get_dataframe_by_name( filename = "roster-bulls-1996.tab", dataset = "doi:10.70122/FK2/HXJVJU", server = "demo.dataverse.org" ) -# Retrieve the same data.frame from dataverse + file DOI -df_from_rds_ingested_by_doi <- +# Retrieve the same file from file DOI +df_tab <- get_dataframe_by_doi( filedoi = "10.70122/FK2/HXJVJU/SA3Z2V", server = "demo.dataverse.org" ) +# Do not run when submitting to CRAN, because the whole +# example sometimes takes longer than 10 sec. +\dontrun{ # Retrieve ingested file originally a Stata dta df_from_stata_ingested <- get_dataframe_by_name( filename = "nlsw88.tab", dataset = "doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org" - ) - + ) # To use the original file version, or for non-ingested data, # please specify `original = TRUE` and specify a function in .f. -# A data.frame is still returned, but the if (requireNamespace("readr", quietly = TRUE)) { df_from_rds_original <- get_dataframe_by_name( @@ -109,17 +111,29 @@ if (requireNamespace("readr", quietly = TRUE)) { server = "demo.dataverse.org", original = TRUE, .f = readr::read_rds - ) + ) } +# Get Stata file as original if (requireNamespace("haven", quietly = TRUE)) { - df_from_stata_original <- + df_stata_original <- get_dataframe_by_name( filename = "nlsw88.tab", dataset = "doi:10.70122/FK2/PPIAXE", server = "demo.dataverse.org", original = TRUE, .f = haven::read_dta - ) + ) } + +# Stata file as ingested file (less information than original) +df_stata_ingested <- + get_dataframe_by_name( + filename = "nlsw88.tab", + dataset = "doi:10.70122/FK2/PPIAXE", + server = "demo.dataverse.org" + ) + +} + } diff --git a/tests/testthat/tests-get_dataframe-original-basketball.R b/tests/testthat/tests-get_dataframe-original-basketball.R index c26c900..8741c98 100644 --- a/tests/testthat/tests-get_dataframe-original-basketball.R +++ b/tests/testthat/tests-get_dataframe-original-basketball.R @@ -1,6 +1,21 @@ # See https://demo.dataverse.org/dataverse/dataverse-client-r # https://doi.org/10.70122/FK2/HXJVJU +# standarize_string <- function (x) { +# substring(x, 1, 10) +# } +standarize_string <- function (x, start = 1, stop = nchar(x)) { + x %>% + base::iconv( + x = ., + from = "latin1", + to = "ASCII//TRANSLIT", + sub = "?" + ) %>% + sub("KukoA?,SF", "Kukoc,SF") %>% + substring(start, stop) +} + test_that("roster-by-name", { expected_ds <- retrieve_info_dataset("dataset-basketball/expected-metadata.yml") expected_file <- expected_ds$roster$raw_value @@ -16,7 +31,23 @@ test_that("roster-by-name", { expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) expect_equal(nchar( actual ), nchar( expected_file )) - expect_equal(actual, expected_file) + # actual <- standarize_string(actual) + # expected_file <- standarize_string(expected_file) + # expect_equal(actual, expected_file) + expect_equal(standarize_string(actual, 0001, 0100), standarize_string(expected_file, 0001, 0100)) + expect_equal(standarize_string(actual, 0101, 0200), standarize_string(expected_file, 0101, 0200)) + expect_equal(standarize_string(actual, 0201, 0300), standarize_string(expected_file, 0201, 0300)) + expect_equal(standarize_string(actual, 0301, 0400), standarize_string(expected_file, 0301, 0400)) + expect_equal(standarize_string(actual, 0401, 0500), standarize_string(expected_file, 0401, 0500)) + expect_equal(standarize_string(actual, 0501, 0600), standarize_string(expected_file, 0501, 0600)) + expect_equal(standarize_string(actual, 0601, 0700), standarize_string(expected_file, 0601, 0700)) + expect_equal(standarize_string(actual, 0701, 0800), standarize_string(expected_file, 0701, 0800)) + expect_equal(standarize_string(actual, 0801, 0900), standarize_string(expected_file, 0801, 0900)) + expect_equal(standarize_string(actual, 0901, 1000), standarize_string(expected_file, 0901, 1000)) + expect_equal(standarize_string(actual, 1001, 1085), standarize_string(expected_file, 1001, 1085)) + + + expect_equal(standarize_string(actual), standarize_string(expected_file)) }) test_that("roster-by-doi", { @@ -33,6 +64,9 @@ test_that("roster-by-doi", { expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) expect_equal(nchar( actual ), nchar( expected_file )) + actual <- standarize_string(actual) + expected_file <- standarize_string(expected_file) + expect_equal(actual, expected_file) }) @@ -50,6 +84,9 @@ test_that("roster-by-id", { expect_equal(substr(actual, 1, 30), substr(expected_file, 1, 30)) expect_equal(nchar( actual ), nchar( expected_file )) + actual <- standarize_string(actual) + expected_file <- standarize_string(expected_file) + expect_equal(actual, expected_file) }) diff --git a/vignettes/A-introduction.Rmd b/vignettes/A-introduction.Rmd index e8377e9..e6c6bab 100644 --- a/vignettes/A-introduction.Rmd +++ b/vignettes/A-introduction.Rmd @@ -57,7 +57,7 @@ You can search for and retrieve data without a Dataverse account for that a spec dataverse_search("ecological inference")[c("name", "type", "description")] ``` -The [search vignette](search.html) describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - `get_file()` - can retrieve the files as raw vectors: +The [search vignette](B-search.html) describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - `get_file()` - can retrieve the files as raw vectors: ```R get_dataset() diff --git a/vignettes/A-introduction.Rmd2 b/vignettes/A-introduction.Rmd2 index 4105d1a..63f630d 100644 --- a/vignettes/A-introduction.Rmd2 +++ b/vignettes/A-introduction.Rmd2 @@ -56,7 +56,7 @@ You can search for and retrieve data without a Dataverse account for that a spec dataverse_search("ecological inference")[c("name", "type", "description")] ``` -The [search vignette](search.html) describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - `get_file()` - can retrieve the files as raw vectors: +The [search vignette](B-search.html) describes this functionality in more detail. To retrieve a data file, we need to investigate the dataset being returned and look at what files it contains using a variety of functions, the last of which - `get_file()` - can retrieve the files as raw vectors: ```R get_dataset() diff --git a/vignettes/C-retrieval.Rmd b/vignettes/C-retrieval.Rmd index f41eae2..ffe6db9 100644 --- a/vignettes/C-retrieval.Rmd +++ b/vignettes/C-retrieval.Rmd @@ -17,7 +17,7 @@ vignette: > -This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](https://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): +This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): > Monogan, Jamie, 2015, "Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems", [doi:10.7910/DVN/ARKOTI](https://doi.org/10.7910/DVN/ARKOTI), Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg== diff --git a/vignettes/C-retrieval.Rmd2 b/vignettes/C-retrieval.Rmd2 index a6c5230..493b2cd 100644 --- a/vignettes/C-retrieval.Rmd2 +++ b/vignettes/C-retrieval.Rmd2 @@ -20,7 +20,7 @@ options(width = 120) knitr::opts_chunk$set(results = "hold") ``` -This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [Jamie Monogan](https://spia.uga.edu/faculty-member/jamie-monogan/)'s book [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): +This vignette shows how to download data from Dataverse using the dataverse package. We'll focus on a Dataverse repository that contains supplemental files for [*Political Analysis Using R*](https://www.springer.com/gb/book/9783319234458), which is stored at Harvard University's [IQSS Dataverse Network](https://dataverse.harvard.edu/): > Monogan, Jamie, 2015, "Political Analysis Using R: Example Code and Data, Plus Data for Practice Problems", [doi:10.7910/DVN/ARKOTI](https://doi.org/10.7910/DVN/ARKOTI), Harvard Dataverse, V1, UNF:6:+itU9hcUJ8I9E0Kqv8HWHg==