-
Notifications
You must be signed in to change notification settings - Fork 43
Code Matrix [cm] Family of Functions
Often we want to code transcripts according to some coding scheme and then use this information to generate descriptive statistics, create visualizations and produce statistical analysis. The cm
family of qdap
functions can assist in converting coded transcripts into a code matrix.
The following tutorial will walk the user through the use of cm_blank
, cm_transform
, cm_range.temp
, cm_fill
, cm2long
and cm_combine
.
###The basic process for creating a code matrix (cm
)
- use
cm_blank
with a list of codes to generate a blank code matrix - either fill in a .csv coded file and read in or use
cm_fill
to feed alist
of range codes - use the code matrix for analysis; for analysis requiring long format use
cm2long
###The following video demonstrates how to use the code matrix (cm
) family of funtions.
------------------------INSERT VIDEO UPON COMPLETION---------------
To download the .pdf of the truncated Romeo and Juliet transcript used in this analysis click here.
library(qdap) #load qdap
browseURL("https://dl.dropbox.com/u/61803503/SampsonGregory.pdf") #the coded transcript
dat <- head(rajSPLIT, 5)[, -8] #create a fake data set
codes <- qcv(nm, sr, cr, spr, fc) #our decided upon codes
(dat1 <- cm_blank(dat, "dialogue", codes = codes)) #create a blank code matrix
cm_blank(dat, "dialogue", codes = codes, transpose = TRUE)[, 1:12] #transposed version
cm_blank(dat, "dialogue", codes = codes, csv = TRUE) #write standard to csv
cm_blank(dat, "dialogue", codes = codes, csv = TRUE, #write transformed to csv
transpose = TRUE, file.name = "test")[, 1:12]
#########################
# TWO METHODS TO CODING #
#===========================================================
#method 1: dummy code in a csv either long or wide format
#read in the coded standard csv
dat2 <- read.csv(file = "http://dl.dropbox.com/u/61803503/test1.csv", strip.white = TRUE, as.is=FALSE)
#read in the coded transformed csv
dat3 <- read.csv("http://dl.dropbox.com/u/61803503/test2.csv", strip.white = TRUE, as.is=FALSE)
cm_transform(dat3, "dialogue") #transform the transformed dataframe
cm_range.temp(codes)
#method 2: use the word.num column/row to range code
coded <- list(
nm = 1,
sr = c(3, 16, 36),
cr = c(5, 19, 23),
spr = c(27, 30),
fc = c(8:15, 25:40)
)
dat.fill <- cm_fill(dat1, coded) #now feed the range codes to cm_fill
#===========================================================
#Using cm_combine to combine codes into a parent node
dat.fill <- cm_combine(dat.fill, combined.columns = list(other = c("nm", "fc"), refs = 10:12))
# OR
dat.fill <- cm_combine(dat.fill,
combined.columns = list(
other = c("nm", "fc"),
refs = 10:12
)
)
#Using cm2long to make a Gantt plot
dat.gantt <- with(dat.fill, gantt.plot(text,
list(person, tot, act, sex, fam.aff, died, word.num, nm,
sr, cr, spr, fc, no.code), plot = FALSE))
NEW <- cm2long(data.frame(dat.fill, dat.gantt[, c("start", "end")]),
code.vars = qcv(terms="nm sr cr spr fc"), no.code="nc")
NEW2 <- cm2long(data.frame(dat.fill, dat.gantt[, c("start", "end")]),
code.vars = qcv(terms="other refs"), no.code="nc")
library(ggplot2)
theplot <- ggplot(NEW, aes(colour=person)) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 5), colour="gray92",
size = .025) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 25), colour="gray50",
size = .05) +
geom_segment(aes(x=start, xend=end, y=code, yend=code), size=2) +
xlab("duration (words)") + opts(legend.position = "none") +
theme_bw() +
scale_x_continuous(expand = c(0,0))+
opts(title = "Speech Duration (words)") +
opts(panel.background = theme_rect(fill=NA, col="black"))
gghorX(theplot)
#====
theplot2 <- ggplot(NEW, aes(colour=person)) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 5), colour="gray92",
size = .025) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 25), colour="gray50",
size = .05) +
geom_segment(aes(x=start, xend=end, y=code, yend=code), size=2) +
xlab("duration (words)") + opts(legend.position = "none") +
theme_bw() +
scale_x_continuous(expand = c(0,0))+
opts(title = "Speech Duration (words)") +
opts(panel.background = theme_rect(fill=NA, col="black")) +
facet_grid(person~., scales = "free") +
opts(legend.position = "none")
gghorX(theplot2)
#====
theplot3 <- ggplot(NEW2, aes(colour=person)) +
geom_vline(xintercept = seq(0, round(max(NEW2$end) -2), 5), colour="gray92",
size = .025) +
geom_vline(xintercept = seq(0, round(max(NEW2$end) -2), 25), colour="gray50",
size = .05) +
geom_segment(aes(x=start, xend=end, y=code, yend=code), size=2) +
xlab("duration (words)") + opts(legend.position = "none") +
theme_bw() +
scale_x_continuous(expand = c(0,0))+
opts(title = "Combined Codes") +
opts(panel.background = theme_rect(fill=NA, col="black"))
gghorX(theplot3)
###Gantt plots produced from the script above