-
Notifications
You must be signed in to change notification settings - Fork 43
Code Matrix [cm] Family of Functions
trinker edited this page Aug 15, 2012
·
22 revisions
Often we want to code transcripts according to some coding scheme and then use this information to generate descriptive statistics, create visualizations and produce statistical analysis. The cm
family of qdap
functions can assist in converting coded transcripts into a code matrix.
The following tutorial will walk the user through the use of cm_blank
, cm_transform
, cm_fill
and cm2long
.
###The basic process for creating a code matrix (cm
)
- use
cm_blank
with a list of codes to generate a blank code matrix - either fill in a .csv coded file and read in or use
cm_fill
to feed alist
of range codes - use the code matrix for analysis; for analysis requiring long format use
cm2long
To download the .pdf of the fake transcript used in this analysis click here.
###The following video demonstrates how to use the code matrix (cm
) family of funtions.
------------------------INSERT VIDEO UPON COMPLETION---------------
library(qdap) #load qdap
browseURL("https://dl.dropbox.com/u/61803503/SampsonGregory.pdf") #the coded transcript
dat <- head(rajSPLIT)[, -8] #create a fake data set
codes <- qcv(nm, sr, cr, spr, fc) #our decided upon codes
(dat1 <- cm_blank(dat, "dialogue", codes = codes)) #create a blank code matrix
cm_blank(dat, "dialogue", codes = codes, transpose = TRUE)[, 1:12] #transposed version
cm_blank(dat, "dialogue", codes = codes, csv = TRUE) #write standard to csv
cm_blank(dat, "dialogue", codes = codes, csv = TRUE, #write transformed to csv
transpose = TRUE, file.name = "test")[, 1:12]
#########################
# TWO METHODS TO CODING #
#===========================================================
#method 1: dummy code in a csv either long or wide format
#read in the coded standard csv
dat2 <- read.csv(file = "http://dl.dropbox.com/u/61803503/test1.csv", strip.white = TRUE, as.is=FALSE)
#read in the coded transformed csv
dat3 <- read.csv("http://dl.dropbox.com/u/61803503/test2.csv", strip.white = TRUE, as.is=FALSE)
cm_transform(dat3, "dialogue") #transform the transformed dataframe
#method 2: use the word.num column/row to range code
coded <- list(
nm = 1,
sr = c(3, 16, 36),
cr = c(5, 19, 23),
spr = c(27, 30),
fc = c(8:15, 25:40)
)
dat.fill <- cm_fill(dat1, coded) #now feed the range codes to cm_fill
#===========================================================
#Using cm2long to make a Gantt plot
dat.gantt <- with(dat.fill, gantt.plot(text,
list(person, tot, act, sex, fam.aff, died, word.num, nm,
sr, cr, spr, fc, no.code), plot = FALSE))
NEW <- cm2long(data.frame(dat.fill, dat.gantt[, c("start", "end")]),
code.vars = qcv(terms="nm sr cr spr fc"), no.code="nc")
library(ggplot2)
theplot <- ggplot(NEW, aes(colour=person)) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 5), colour="gray92",
size = .025) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 25), colour="gray50",
size = .05) +
geom_segment(aes(x=start, xend=end, y=code, yend=code), size=2) +
xlab("duration (words)") + opts(legend.position = "none") +
theme_bw() +
scale_x_continuous(expand = c(0,0))+
opts(title = "Speech Duration (words)") +
opts(panel.background = theme_rect(fill=NA, col="black"))
gghorX(theplot)
theplot2 <- ggplot(NEW, aes(colour=person)) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 5), colour="gray92",
size = .025) +
geom_vline(xintercept = seq(0, round(max(NEW$end) -2), 25), colour="gray50",
size = .05) +
geom_segment(aes(x=start, xend=end, y=code, yend=code), size=2) +
xlab("duration (words)") + opts(legend.position = "none") +
theme_bw() +
scale_x_continuous(expand = c(0,0))+
opts(title = "Speech Duration (words)") +
opts(panel.background = theme_rect(fill=NA, col="black")) +
facet_grid(person~., scales = "free") +
opts(legend.position = "none")
gghorX(theplot2)
#Gantt plots produced from the script above