text recognition error in speech #13

KevinGlock · 2019-10-04T14:03:43Z

Interjections are not recognised in

p <- partition("GERMAPARL", speaker = "Stephan Mayer", date = "2016-09-23", encoding = "UTF-8")

read(p)

The text was updated successfully, but these errors were encountered:

KevinGlock · 2019-10-04T14:17:42Z

I saw the error occurs in the other speeches too.
Here is the workflow to find those ones.

## load libraries

library("polmineR")
library("magrittr")
library("data.table")

use("GermaParl")


## create partitions

coi_cdu16 <- partition("GERMAPARL",
                       parliamentary_group = "CDU/CSU",
                       year  = 2012:2016,
                       interjection= F,
                       role = c("mp", "government"))


## as partition bundles

pb2 <- partition_bundle(coi_cdu16, s_attribute = "date")

nested2 <- lapply(pb2@objects,
                  function(x) partition_bundle(x,
                                               s_attribute = "agenda_item",
                                               verbose = F
                  )
)


## flatten nested data frames

debates2 <- flatten(nested2)

names(debates2) <- paste(blapply(debates2,
                                 function(x) s_attributes(x, "date")),
                         blapply(debates2,
                                 function(x) name(x)),
                         sep = "_"
)
q1 <- c('"[Mm]ehrstaat.*"', '".*[Ss]taatsbÃ¼rger.*"', '".*[Ss]taatsangeh.*rig.*"',
        '".*[Ss]taatszugeh.*rig.*"', '"[Ss]taatenlos.*"', '"[Aa]us.*bÃ¼rger.*"',
        '"[Ee]in.*bÃ¼rger.*"', '"Pass"', '"PaÃY"',
        '"Blutsrecht.*"', '"Geburtsrecht.*"', '"Geburtsprinzip.*"',
        '"[Ii]us soli"', '"[Ii]us sanguinis"', '"[Jj]us soli"', '"[Jj]us sanguinis"',
        '"[Dd]oppel.* [Ss]taat.*"', '"Abstammungsrecht.*"', '"Abstammungsprinzip.*"')

q2 <- c('"[Dd]oppelstaat.*"', '"[Mm]ehrstaat.*"',
        '"[Dd]oppel.* [Ss]taat.*"', '"Doppelpass.*"', '"DoppelpaÃY.*"',
        '"[Oo]ptionspflicht.*"', '"[Oo]ptionszwang.*"', '"Optionsmodell.*"')

q3 <- c('".*[Aa]syl.*"', '".*[Ff]lucht.*"', '".*[Ff]lÃ¼cht.*"', '".*[Mm]igra.*"',
        '".*[Ee]in.*wander.*"', '".*[Gg]renz.*"', '"[Ff]amilienzusammen.*"', '".*[Aa]us.*bÃ¼rger.*"',
        '".*[Aa]b.*schie.*"', '".*[Aa]b.*schob.*"', '".*[Ee]in.*bÃ¼rger.*"', '".*[Aa]us.*sied.*"',
        '"Aufnahme.*"', '"[Vv]isa.*"', '"[Vv]isum.*"', '"LoyalitÃ¤tskonflikt"', '"IdentitÃ¤tsfeststellung"',
        '"RÃ¼ckfÃ¼hrung.*"', '".*[Aa]uslÃ¤nd.*"', '".*[Rr]usslanddeutsch.*"',
        '"[Aa]ufenthalt.*"', '"RÃ¼ckÃ¼bernahme.*"', '"Ehegattennachzug"', '"Duldung.*"',
        '"Residenzpflicht"', '"Regelanfrage"', '".*Vertreib.*"', '".*Vertrieb.*"', '"AZR"', '"Aufnahme.*"')

q4 <- c(q1, q2, q3)


## erease quotation marks to highlight protocols

q1_regex <- gsub('^\\"(.*?)\\"$', '\\1', q1)

q2_regex <- gsub('^\\"(.*?)\\"$', '\\1', q2)

q3_regex <- gsub('^\\"(.*?)\\"$', '\\1', q3)

q4_regex <- gsub('^\\"(.*?)\\"$', '\\1', q4)

dt6 <- count(debates2,
             query = q2,
             regex = T,
             fill = T,
             cqp = T
) %>% setorderv(cols = "TOTAL",
                order = -1L
)

debates_dual2 <- debates2[[ subset(dt6, TOTAL >= 4)[["partition"]] ]] 
debates_dual2[[6]] %>%
  read() %>%
  highlight(orange = q4_regex,
            lightgreen = q1_regex,
            red = q2_regex,
            regex = T
  )

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

text recognition error in speech #13

text recognition error in speech #13

KevinGlock commented Oct 4, 2019

KevinGlock commented Oct 4, 2019

text recognition error in speech #13

text recognition error in speech #13

Comments

KevinGlock commented Oct 4, 2019

KevinGlock commented Oct 4, 2019