pipes dropped for oldrel compatibility #38

PolMine · Feb 26, 2024 · a52d1fb · a52d1fb
1 parent 12d01fc
commit a52d1fb
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 62 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -57,6 +57,7 @@ importFrom(tibble,as_tibble)
 importFrom(utils,URLencode)
 importFrom(xml2,read_xml)
 importFrom(xml2,xml_attr)
+importFrom(xml2,xml_children)
 importFrom(xml2,xml_find_all)
 importFrom(xml2,xml_set_attrs)
 importFrom(xml2,xml_text)
diff --git a/R/dbpedia.R b/R/dbpedia.R
@@ -155,6 +155,7 @@ as_annotation <- function(x){
 #' @param feature_tag ...
 #' @importFrom stringi stri_c
 #' @importFrom NLP Annotation
+#' @importFrom xml2 xml_children
 to_annotation = function(nodes, xml, token_tags, feature_tag) {
 
   if (inherits(nodes, "xml_nodeset")) {
@@ -169,8 +170,10 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
 
   } else {
 
-    token_elements <- nodes |>
-      xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = token_tags))
+    token_elements <- xml2::xml_find_all(
+        nodes,
+        xpath = namespaced_xpath(xml = xml, tags = token_tags)
+      )
 
     # make token annotation data annotation
 
@@ -210,9 +213,10 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     # data.frame split to rwos
 
     token_feat_dataframe <- data.frame(word = toks, id = tok_ids)
-    token_feat_list <- split(token_feat_dataframe, seq(nrow(token_feat_dataframe))) |>
-      unname()
-
+    token_feat_list <- unname(
+      split(token_feat_dataframe, seq(nrow(token_feat_dataframe)))
+    )
+
     token_annotation <- NLP::Annotation(
       seq_along(tok_ids), # IDs must be integer, which is a bit unfortunate
       rep("word", length(tok_ids)),
@@ -224,49 +228,45 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
     # and add feature elements if chosen
 
     if (!is.null(feature_tag)) {
-      feature_elements <- nodes |>
-        xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = feature_tag))
+      feature_elements <-  xml2::xml_find_all(
+        nodes,
+        xpath = namespaced_xpath(xml = xml, tags = feature_tag)
+      )
     } else {
       feature_elements <- NULL
     }
 
     if (length(feature_elements) > 0) {
 
-
-      feature_ids <- sapply(feature_elements, function(element) {
-        xml2::xml_find_first(element,
-                             xpath = namespaced_xpath(xml = xml, tags = token_tags)) |>
-          xml2::xml_attr("id") 
-      }
-      )
+      feature_ids <- sapply(
+        feature_elements,
+        function(element) {
+          el <- xml2::xml_find_first(
+            element,
+            xpath = namespaced_xpath(xml = xml, tags = token_tags)
+          )
+          xml2::xml_attr(el, "id") 
+        })
 
       feature_ids <- sprintf("%s_%s", feature_ids, feature_tag)
 
       # get attributes of features
       feature_ids <- feature_ids # name has no ID. We use the first word ID (assuming that there are no overlaps?)
       feature_kinds <- xml2::xml_attr(feature_elements, "type")
-      feature_texts <- sapply(feature_elements, function(feat) {
-        xml2::xml_children(feat) |>
-          xml2::xml_text() |>
-          paste(collapse = " ")
-      }
+      feature_texts <- sapply(
+        feature_elements,
+        function(feat) paste(xml_text(xml_children(feat)), collapse = " ")
       )
 
       # get spans for features
 
-      entity_spans <- sapply(feature_elements, function(element) {
-        child_id <- element |>
-          xml2::xml_children() |>
-          xml2::xml_attr("id")
-
+      entity_spans <- t(sapply(feature_elements, function(element) {
+        child_id <- xml_attr(xml_children(element), "id")
         child_idx <- which(tok_ids %in% child_id)
         child_start <- min(start_positions[child_idx])
         child_end <- max(end_positions[child_idx])
-
-        matrix(c(child_start, child_end), nrow = 1, ncol = 2)
-
-      }
-      ) |> t()
+        matrix(c(child_start, child_end), nrow = 1L, ncol = 2L)
+      }))
 
 
       feature_annotation <- NLP::Annotation(
@@ -300,7 +300,7 @@ to_annotation = function(nodes, xml, token_tags, feature_tag) {
 
     # make string
     word_with_ws <- paste(toks, ifelse(is.na(tok_joins), " ", ""), sep = "")
-    s <- stringi::stri_c(word_with_ws, collapse = "") |> trimws()
+    s <- trimws(stringi::stri_c(word_with_ws, collapse = ""))
 
     # add segment id as metadata (should work if segment is NULL as the TEI has
     # an ID as well).
@@ -705,13 +705,12 @@ setMethod("get_dbpedia_uris", "subcorpus_bundle", function(x, language = getOpti
 #' 
 #' # Process quanteda corpus 
 #' library(quanteda)
-#' uritab <- data_char_ukimmig2010 |>
-#'   corpus() |>
+#' uritab <- data_char_ukimmig2010 %>%
+#'   corpus() %>%
 #'   get_dbpedia_uris(
 #'     verbose = FALSE,
 #'     config = httr::config(http_version = 1.1)
 #'   )
-#'   
 #' @rdname get_dbpedia_uris
 setMethod(
   "get_dbpedia_uris",

diff --git a/R/utils.R b/R/utils.R
@@ -222,23 +222,20 @@ map_types_to_class <- function(x, mapping_vector, other = "MISC", verbose = TRUE
     # types is a list of lists. Transform to single character vector.
     type_list <- unlist(types, recursive = FALSE)
 
-    types_with_class <- lapply(seq_along(type_list), function(i) {
+    types_with_class_raw <- lapply(seq_along(type_list), function(i) {
       list_name <- names(type_list)[[i]]
       list_elements <- type_list[[i]]
       paste0(list_name, ":", list_elements)
-    }) |>
-      unlist() |>
-      intersect(mapping_vector)
+    })
+    types_with_class <- intersect(unlist(types_with_class_raw), mapping_vector)
 
-    if (length(types_with_class) > 0) {
+    if (length(types_with_class) > 0L) {
       match_idx <- which(mapping_vector %in% types_with_class)
 
-      class_name <- mapping_vector |>
-        names() |>
-        _[match_idx] |>
-        unique() |>
-        sort() |>
-        paste(collapse = "|")
+      class_name <- paste(
+        sort(unique(names(mapping_vector)[match_idx])),
+        collapse = "|"
+      )
 
     } else {
       class_name <- other

diff --git a/R/wikidata.R b/R/wikidata.R
@@ -263,10 +263,17 @@ setGeneric(
 #' 
 #' httr::set_config(httr::config(ssl_verifypeer = 0L))
 #'
-#' uritab <- data_char_ukimmig2010 |>
-#'   corpus() |>
-#'   get_dbpedia_uris(progress = TRUE) %>% 
-#'   add_wikidata_uris(endpoint = "https://dbpedia.org/sparql/", progress = TRUE, chunksize = 100) %>% 
+#' uritab <- data_char_ukimmig2010 %>%
+#'   corpus() %>%
+#'   get_dbpedia_uris(
+#'     progress = TRUE,
+#'     config = httr::config(http_version = 1.1)
+#'   ) %>% 
+#'   add_wikidata_uris(
+#'     endpoint = "https://dbpedia.org/sparql/",
+#'     progress = TRUE,
+#'     chunksize = 100
+#'   ) %>% 
 #'   wikidata_query(id = "P31")
 #' }
 #'   

diff --git a/R/xml.R b/R/xml.R
@@ -26,11 +26,12 @@ xml_enrich <- function(xml,
 ) {
 
   # get all nodes which might contain entities
-  nodes <- xml |>
-    xml2::xml_find_all(xpath = namespaced_xpath(xml = xml, tags = token_tags))
+  nodes <- xml2::xml_find_all(
+    xml,
+    xpath = namespaced_xpath(xml = xml, tags = token_tags)
+  )
 
-  node_ids <- nodes |>
-    xml2::xml_attr("id")
+  node_ids <- xml2::xml_attr(nodes, "id")
 
   # for each annotation, extract identified words 
 
@@ -46,9 +47,11 @@ xml_enrich <- function(xml,
       # if there is no feature tag, pre-annotated named entities weren't
       # provided. Add identified named entities to tokens.
 
-      annotation_id <- annotation_dt[i, ][["original_id"]] |>
-        strsplit(split = "\\|") |>
-        unlist()
+      annotation_id <- unlist(strsplit(
+        annotation_dt[i, ][["original_id"]],
+        split = "\\|"
+      )
+      )
 
       # there could be additional values such as the type?
       nodes_idx <- which(node_ids %in% annotation_id)

diff --git a/man/get_dbpedia_uris.Rd b/man/get_dbpedia_uris.Rd
diff --git a/man/wikidata_uris.Rd b/man/wikidata_uris.Rd