Skip to content

Commit

Permalink
registry_file_parse()
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Blätte authored and Andreas Blätte committed Jul 9, 2023
1 parent 2571090 commit 1cf8997
Showing 1 changed file with 15 additions and 16 deletions.
31 changes: 15 additions & 16 deletions vignettes/vignette.Rmd
Expand Up @@ -14,6 +14,7 @@ editor_options:
```{r load_libraries}
library(polmineR)
library(duplicates)
library(cwbtools)
```


Expand Down Expand Up @@ -78,24 +79,22 @@ annodata <- duplicates_as_annotation_data(


```{r encode, eval = FALSE}
library(cwbtools)
regdata <- registry_file_parse(corpus = "REUTERS2", registry = registry())
for (s_attr in c("is_duplicate", "duplicates")){
s_attribute_encode(
values = as.character(annodata[[s_attr]]),
data_dir = regdata$home,
s_attribute = s_attr,
corpus = "REUTERS2",
region_matrix = as.matrix(annodata[, c("cpos_left", "cpos_right")]),
method = "R",
registry_dir = regdata$registry,
encoding = regdata$properties["charset"],
delete = TRUE,
verbose = TRUE
)
}
# for (s_attr in c("is_duplicate", "duplicates")){
# s_attribute_encode(
# values = as.character(annodata[[s_attr]]),
# data_dir = regdata$home,
# s_attribute = s_attr,
# corpus = "REUTERS2",
# region_matrix = as.matrix(annodata[, c("cpos_left", "cpos_right")]),
# method = "R",
# registry_dir = regdata$registry,
# encoding = regdata$properties["charset"],
# delete = TRUE,
# verbose = TRUE
# )
# }
# RcppCWB::cl_load_corpus("REUTERS2", registry = polmineR::registry())
Expand Down

0 comments on commit 1cf8997

Please sign in to comment.