Skip to content

Commit

Permalink
new fn wikidata_query()
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Sep 21, 2023
1 parent 885c7b4 commit a1bfc2c
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 2 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Expand Up @@ -22,7 +22,8 @@ Suggests:
markdown (>= 1.5),
rmarkdown,
knitr,
SPARQL
SPARQL,
WikidataQueryServiceR
VignetteBuilder: knitr
SystemRequirements: docker
LazyData: true
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(as_annotation)
export(dbpedia_get_wikidata_uris)
export(wikidata_query)
exportMethods(get_dbpedia_links)
import(methods)
importFrom(RcppCWB,cl_struc2str)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
@@ -1,6 +1,9 @@
## dbpedia v0.0.1.9001
## dbpedia v0.0.1.9001-v0.0.1.9002

* New auxiliary function `as_chunks()`.
* New function `dbpedia_get_wikidata_uris()`.
* New function `wikidata_query()` as high-level wrapper for
`WikidataQueryServiceR::query_wikidata()`.

## dbpedia v0.0.1

Expand Down
74 changes: 74 additions & 0 deletions R/wikidata.R
Expand Up @@ -9,6 +9,7 @@
#' @param wait A numeric value passed into `Sys.sleep()` to slow down sequence
#' of requests (and avoid denial of service). Defaults to 100.
#' @param progress Whether to show progress bar (`logical` value).
#' @export
#' @examples
#' \donttest{
#' dbpedia_ids <- c(
Expand Down Expand Up @@ -83,5 +84,78 @@ dbpedia_get_wikidata_uris <- function(x, optional, endpoint, limit = 100, wait =

if (progress) cli_progress_done()

do.call(rbind, retval_li)
}


#' Query Wikidata endpoint for additional information.
#'
#' This is a wrapper for `WikidataQueryServiceR::query_wikidata()` to get
#' additional information for known wikidata IDs.
#'
#' @return A `tibble`.
#' @param x A vector of wikidata ids.
#' @param id Wikidata ID for information to retrieve (`character` vector).
#' @param limit Maximum number of wikidata IDs to be sent to endpoint at a time.
#' @param progress Whether to show progress information (`logical` value).
#' @param wait A numeric value - slow down requests to avoid denial of service.
#' @export
#' @examples
#' \donttest{
#' wikidata_ids <- c("Q1741365", "Q3840", "Q437")
#' wikidata_resolve_dbpedia_uri(
#' wikidata_ids,
#' id = "P439", # German municipality key
#' wait = 0,
#' limit = 2,
#' progress = TRUE
#' )
#' }
wikidata_query <- function(x, id, limit = 100L, wait = 1, progress = FALSE){

if (!requireNamespace("WikidataQueryServiceR", quietly = TRUE)){
stop("R package WikidataQueryServiceR required but not available. ")
}

stopifnot(
is.vector(x), is.character(x),
is.character(id), length(id) == 1L,
is.numeric(limit), limit > 0,
is.numeric(wait), wait > 0, length(wait) == 1L,
is.logical(progress), length(progress) == 1L
)

template <- 'SELECT ?item ?label ?key ?keyLabel
WHERE {
VALUES ?item { %s }
OPTIONAL { ?item wdt:%s ?key . }
?item rdfs:label ?label
filter(lang(?label) = "de")
SERVICE wikibase:label { bd:serviceParam wikibase:language "de". }
}'

chunks <- as_chunks(x = x, size = limit)
retval_li <- list()

if (progress) cli_progress_bar("Tasks", total = length(chunks), type = "tasks")
for (i in 1L:length(chunks)){
cli_progress_update()
query <- sprintf(
template,
paste0("wd:", chunks[[i]], collapse = " "),
id
)

Sys.sleep(wait)

retval_li[[i]] <- WikidataQueryServiceR::query_wikidata(
sparql_query = query,
format = "simple"
)
colnames(retval_li[[i]])[1] <- "wikidata_id"
}

if (progress) cli_progress_done()

do.call(rbind, retval_li)
}
38 changes: 38 additions & 0 deletions man/wikidata_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a1bfc2c

Please sign in to comment.