Skip to content

Commit

Permalink
further exmamples moved to dontrun #273
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Oct 29, 2023
1 parent 55c6ac9 commit 49803e0
Show file tree
Hide file tree
Showing 12 changed files with 78 additions and 54 deletions.
3 changes: 2 additions & 1 deletion R/S4classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,7 @@ setClass("press_subcorpus", contains = "subcorpus")
#' @rdname phrases-class
#' @aliases phrases-class
#' @examples
#' \dontrun{
#' # Workflow to create document-term-matrix with phrases
#'
#' obs <- corpus("GERMAPARLMINI") %>%
Expand All @@ -1034,7 +1035,7 @@ setClass("press_subcorpus", contains = "subcorpus")
#'
#' grep("erneuerbaren_Energien", colnames(dtm))
#' grep("verpasste_Chancen", colnames(dtm))
#'
#' }
setClass(
"phrases",
contains = "regions"
Expand Down
4 changes: 2 additions & 2 deletions R/cooccurrences.R
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ setMethod("cooccurrences", "context", function(.Object, method = "ll", verbose =
#'
#' # compute cooccurrences for a set of partitions
#' # (example not run by default to save time on test machines)
#' \donttest{
#' \dontrun{
#' pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker")
#' ps <- count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]]
#' pb_min <- pb[ps]
Expand Down Expand Up @@ -830,7 +830,7 @@ setMethod("Cooccurrences", "subcorpus", function(
#' @exportMethod as.simple_triplet_matrix
#' @rdname all-cooccurrences-class
#' @examples
#' \donttest{
#' \dontrun{
#' # takes too much time on CRAN test machines
#' use(pkg = "RcppCWB", corpus = "REUTERS")
#' X <- Cooccurrences("REUTERS", p_attribute = "word", left = 2L, right = 2L)
Expand Down
27 changes: 14 additions & 13 deletions R/decode.R
Original file line number Diff line number Diff line change
Expand Up @@ -264,28 +264,29 @@ as.AnnotatedPlainTextDocument <- function(x, p_attributes = NULL, s_attributes =
#' use(pkg = "RcppCWB", corpus = "REUTERS")
#'
#' # Decode corpus as data.table
#' dt <- decode("GERMAPARLMINI", to = "data.table")
#' dt <- decode("REUTERS", to = "data.table")
#'
#' # Decode corpus selectively
#' dt <- decode("GERMAPARLMINI", to = "data.table", p_attributes = "word", s_attributes = "party")
#' dt <- decode(
#' "REUTERS",
#' to = "data.table",
#' p_attributes = "word",
#' s_attributes = "id"
#' )
#'
#' # Decode a subcorpus
#' dt <- corpus("GERMAPARLMINI") %>%
#' subset(speaker == "Angela Dorothea Merkel") %>%
#' decode(s_attributes = c("speaker", "party", "date"), to = "data.table")
#'
#' # Decode subcorpus selectively
#' corpus("GERMAPARLMINI") %>%
#' subset(speaker == "Angela Dorothea Merkel") %>%
#' decode(to = "data.table", p_attributes = "word", s_attributes = "party")
#' dt <- corpus("REUTERS") %>%
#' subset(id %in% c("127", "144")) %>%
#' decode(s_attributes = "id", to = "data.table")
#'
#' # Decode partition
#' P <- partition("REUTERS", places = "kuwait", regex = TRUE)
#' dt <- decode(P, to = "data.table")
#' dt <- partition("REUTERS", places = "kuwait", regex = TRUE) %>%
#' decode(to = "data.table")
#'
#' # Previous versions of polmineR offered an option to decode a single
#' # s-attribute. This is how you could proceed to get a table with metadata.
#' dt <- decode(P, s_attribute = "id", decode = FALSE, to = "data.table")
#' dt <- partition("REUTERS", places = "kuwait", regex = TRUE) %>%
#' decode(s_attribute = "id", decode = FALSE, to = "data.table")
#' dt[, "word" := NULL]
#' dt[,{list(cpos_left = min(.SD[["cpos"]]), cpos_right = max(.SD[["cpos"]]))}, by = "id"]
#'
Expand Down
2 changes: 2 additions & 0 deletions R/features.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ NULL
#' Manning, Christopher D.; Schuetze, Hinrich (1999): \emph{Foundations of Statistical Natural Language
#' Processing}. MIT Press: Cambridge, Mass., pp. 151-189 (ch. 5).
#' @examples
#' \dontrun{
#' use("polmineR")
#'
#' kauder <- partition(
Expand All @@ -115,6 +116,7 @@ NULL
#' terms_kauder <- features(kauder_count, all_count, included = TRUE)
#' top100 <- subset(terms_kauder, rank_chisquare <= 100)
#' head(top100)
#' }
#'
#' # get matrix with features (dontrun to keep time for examples short)
#' \dontrun{
Expand Down
30 changes: 19 additions & 11 deletions R/token_stream.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,34 @@ NULL
#' @examples
#' use(pkg = "RcppCWB", corpus = "REUTERS")
#'
#' # Decode first words of GERMAPARLMINI corpus (first sentence)
#' get_token_stream(0:9, corpus = "GERMAPARLMINI", p_attribute = "word")
#' # Decode first words of REUTERS corpus (first sentence)
#' get_token_stream(0:20, corpus = "REUTERS", p_attribute = "word")
#'
#' # Decode first sentence and collapse tokens into single string
#' get_token_stream(0:9, corpus = "GERMAPARLMINI", p_attribute = "word", collapse = " ")
#' get_token_stream(0:20, corpus = "REUTERS", p_attribute = "word", collapse = " ")
#'
#' # Decode regions defined by two-column integer matrix
#' region_matrix <- matrix(c(0L,9L,10L,25L), ncol = 2, byrow = TRUE)
#' get_token_stream(region_matrix, corpus = "GERMAPARLMINI", p_attribute = "word", encoding = "latin1")
#' region_matrix <- matrix(c(0L,20L,21L,38L), ncol = 2, byrow = TRUE)
#' get_token_stream(
#' region_matrix,
#' corpus = "REUTERS",
#' p_attribute = "word",
#' encoding = "latin1"
#' )
#'
#' # Use argument 'beautify' to remove surplus whitespace
#' \dontrun{
#' get_token_stream(
#' region_matrix,
#' corpus = "GERMAPARLMINI",
#' p_attribute = "word",
#' encoding = "latin1",
#' collapse = " ", beautify = TRUE
#' )
#' }
#'
#' # Decode entire corpus (corpus object / specified by corpus ID)
#' fulltext <- get_token_stream("GERMAPARLMINI", p_attribute = "word")
#' corpus("GERMAPARLMINI") %>%
#' corpus("REUTERS") %>%
#' get_token_stream(p_attribute = "word") %>%
#' head()
#'
Expand All @@ -90,9 +96,11 @@ NULL
#' head()
#'
#' # Decode partition_bundle
#' \dontrun{
#' pb_tokstr <- corpus("REUTERS") %>%
#' split(s_attribute = "id") %>%
#' get_token_stream(p_attribute = "word")
#' }
setGeneric("get_token_stream", function(.Object, ...) standardGeneric("get_token_stream"))


Expand Down Expand Up @@ -236,7 +244,7 @@ setMethod("get_token_stream", "regions", function(.Object, p_attribute = "word",
#' @importFrom stringi stri_c
#' @importFrom RcppCWB region_matrix_to_ids cl_lexicon_size
#' @examples
#' \donttest{
#' \dontrun{
#' # Get token stream for partition_bundle
#' pb <- partition_bundle("REUTERS", s_attribute = "id")
#' ts_list <- get_token_stream(pb)
Expand Down Expand Up @@ -404,9 +412,9 @@ setOldClass("String")
#' Decode as String.
#'
#' @examples
#' use("polmineR")
#' p <- partition("GERMAPARLMINI", date = "2009-11-10", speaker = "Angela Dorothea Merkel")
#' s <- as(p, "String")
#' corpus("REUTERS") %>%
#' subset(id == "127") %>%
#' as("String")
#' @name partition_to_string
setAs(from = "slice", to = "String", def = function(from){
word <- get_token_stream(from, p_attribute = "word")
Expand Down
2 changes: 1 addition & 1 deletion man/all-cooccurrences-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/cooccurrences.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 14 additions & 13 deletions man/decode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/features.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 16 additions & 8 deletions man/get_token_stream-method.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/partition_to_string.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/phrases-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 49803e0

Please sign in to comment.