Skip to content

Commit

Permalink
experimental argument types_src for get_dbpedia_uris() #27
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Feb 27, 2024
1 parent 8eae01e commit f4dc779
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 11 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Expand Up @@ -5,12 +5,12 @@ export(as_annotation)
export(as_subcorpus)
export(dbpedia_get_wikidata_uris)
export(dbpedia_spotlight_status)
export(map_types_to_class)
export(namespaced_xpath)
export(sparql_query)
export(wikidata_query)
export(xml_enrich)
exportMethods(add_wikidata_uris)
exportMethods(entity_types_map)
exportMethods(get_annotation_table)
exportMethods(get_dbpedia_uris)
import(methods)
Expand Down
44 changes: 42 additions & 2 deletions R/dbpedia.R
Expand Up @@ -354,7 +354,19 @@ setGeneric("get_dbpedia_uris", function(x, ...) standardGeneric("get_dbpedia_uri
#' types = "Company",
#' api = "http://api.dbpedia-spotlight.org/en/annotate"
#' )
setMethod("get_dbpedia_uris", "character", function(x, language = getOption("dbpedia.lang"), max_len = 5600L, confidence = 0.35, api = getOption("dbpedia.endpoint"), types = character(), support = 20, verbose = TRUE){
setMethod(
"get_dbpedia_uris", "character",
function(
x,
language = getOption("dbpedia.lang"),
max_len = 5600L,
confidence = 0.35,
api = getOption("dbpedia.endpoint"),
types = character(),
support = 20,
types_src = c("DBpedia", "Wikidata"),
verbose = TRUE
){

if (nchar(x) > max_len){
if (verbose) cli_alert_warning(
Expand Down Expand Up @@ -425,6 +437,28 @@ setMethod("get_dbpedia_uris", "character", function(x, language = getOption("dbp
)
}
)]

if (length(types_src) > 0L){
src_all <- unique(unlist(lapply(resources_min[["types"]], names)))
src_unused <- setdiff(src_all, types_src)
if (length(src_unused) > 0L)
cli_alert_info(
"dropping available types from: {paste(src_unused, collapse = ' / ')}"
)
for (src in types_src){
types_vec <- unlist(lapply(
lapply(resources_min[["types"]], `[[`, src),
function(x){
if (is.null(x))
NA_character_
else
sprintf("|%s|", paste(x, collapse = "|"))
}
))

resources_min[, (paste(src, "type", sep = "_")) := types_vec]
}
}

resources_min
})
Expand Down Expand Up @@ -475,6 +509,11 @@ setMethod("get_dbpedia_uris", "AnnotatedPlainTextDocument", function(x, language
#' vector is empty (default), no restrictions are applied.
#' @param support The number of indegrees at Wikidata. Useful for limiting the
#' the number of results by excluding insignificant entities.
#' @param types_src A `character` vector specifying knowledge bases as sources
#' for entity types. If provided, columns following the pattern '(src)_type'
#' (e.g. "DBpedia_types") with entity types (`NA` if not available) will be
#' added to the table. Values are wrapped and separated by vertical bars.
#' `types_src` defaults to "DBpedia" and "Wikidata".
#' @param verbose A `logical` value - whether to display messages.
#' @param progress A `logical` value - whether to show progress.
#' @param s_attribute A length-one `character` vector indicating a s-attribute.
Expand Down Expand Up @@ -765,7 +804,8 @@ setMethod(
verbose = if (progress) FALSE else verbose
)[, "doc" := docname]
}
)
),
fill = TRUE
)

if (progress) cli_progress_done(.envir = env)
Expand Down
13 changes: 9 additions & 4 deletions R/entity_types.R
Expand Up @@ -12,19 +12,24 @@
#' @param other a `character vector` with the name of the class of all types not
#' matched by the `mapping_vector`.
#' @param verbose A `logical` value - whether to display messages.
#' @param ... Further arguments.
#' @importFrom data.table is.data.table
#' @importFrom cli format_error cli_alert_info
#' @details If there is more than one match between the retrieved types and the
#' `mapping vector`, unique classes are sorted alphabetically and collapsed.
#' @return Function adds classes to input data.table by reference.
#' @exportMethod generic
#' @exportMethod entity_types_map
#' @rdname entity_types_map
setGeneric("entity_types_map", function(x, ...)
standardGeneric("entity_types_map"))
setGeneric(
"entity_types_map",
function(x, ...) standardGeneric("entity_types_map")
)


#' @rdname entity_types_map
#' @examples
#' library(quanteda)
#'
#' inaugural_paragraphs <- data_corpus_inaugural %>%
#' corpus_subset(Year == 2009) %>% # limit to Barack Obama 2009
#' corpus_reshape(to = "paragraphs")
Expand Down Expand Up @@ -116,4 +121,4 @@ setMethod(

x[, class := entity_types_map(x = x[["types"]])]
x
}
})
44 changes: 40 additions & 4 deletions man/map_types_to_class.Rd → man/entity_types_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/get_dbpedia_uris.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f4dc779

Please sign in to comment.