/
DefaultOutputterMain.R
48 lines (30 loc) · 1.35 KB
/
DefaultOutputterMain.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Visualising results in a tabular format
library(tidyverse)
df = read.csv("tracerdataset-tracerdataset.score.expanded.csv", header = F, "\t")
# "The .score file lists results bidirectionally and thus redundantly". Remove duplicates.
df = df %>%
filter(row_number() %% 2 == 0)
# Variable V7: True means matches within the same work. Not interested in this.
df <- subset(df, df$V7 != "true")
# Full match.
df$total = ifelse(df$V5 == df$V6, "TRUE", "FALSE")
# Some words.
df$parcial = df$V6 %in% df$V5
# df$total is almost the same as score = 1 (df$V4). E.g. "luego vamos" vs "vamos luego": score = 1, but df$total == false.
# df$V7 = NULL # not needed.
# Keeps just the 2 first digits to reduce the comparation to works (not lines)
df$V1 = str_pad(df$V1, 7, pad = "0", "left") # 7 is maximum
df$V2 = str_pad(df$V2, 7, pad = "0", "left")
df$V1 = substr(df$V1,1,2)
df$V2 = substr(df$V2,1,2)
# Subsets of works. Atention! the first work gets removed from V1 after removing duplicates.
obra_02_vs_all <- subset(df, df$V1 %in% df$V1[df$V1 == "02"])
obra_02_vs_obra_01 <- subset(df, df$V1 %in% df$V1[df$V1 == "02"] & df$V2 %in% df$V2[df$V2 == "01"] )
# Save to disk
write.table(df,
file = "tracer_scores_teatro.csv",
sep="\t", # separado por tabulador.
col.names=T,
row.names =FALSE,
quote = FALSE
)