/
zoom on school.R
executable file
·112 lines (78 loc) · 4.7 KB
/
zoom on school.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
## job search aids
# make sure the data exists
if (!exists("noexcludes")) {source('/Users/benmiller314/Dropbox/coursework, etc/dissertation/data, code, and figures/Dissertation Research/start here.R')}
if(!exists("sumbytags", mode="function")) { source(file="method collocation heatmap.R") }
# narrow to a particular school TO DO: make this a parameter of a wraparound function... and loopable
s <- "University of Pittsburgh-Pittsburgh Campus"
s.short <- "University of Pittsburgh"
cuny <- "CUNY Graduate School and University Center"
cuny.short <- "CUNY Grad Center"
# get the data from just that school
s.data <- noexcludes[which(noexcludes$School == s),]
cuny.data <- noexcludes[which(noexcludes$School == cuny),]
# zoom in on tags to aid in plotting
df <- data.frame(s.data[, tagnames], row.names=s.data$Pub.number)
if(remake_figs) { pdf(filename=paste0(imageloc, "figures for ", s, ".pdf")) }
sumbytags("s.data")
mtext(paste("Method correlations for ", s.short))
if(remake_figs) { dev.off() }
# Get method count data for various analyses
a <- sumbytags("s.data")
# Raw count of methods ("methodological output")
if(remake_figs) { pdf(file=paste0(imageloc, "method barplot for ", s.short, ".pdf")) }
barplot(a$total.counts[c("Phil", "Crit", "Rhet", "Hist", "Disc", "Poet", "Meta", "Othr", "Modl", "Surv", "Intv", "Expt", "Prac", "Ethn", "Clin")], main=paste("Method tag counts for", s.short), horiz=F, las=2)
if(remake_figs) { dev.off() }
# Percentages of methods ("methodological focus")
# build color scale
require(RColorBrewer)
myCol <- brewer.pal(9, "Greys")
# and make a legend for it
if(remake_figs) {
filename <- paste0(imageloc, "color legend for ", a$dataset, " methodological focus.pdf")
pdf(filename)
}
xleft <- seq(0, 1, length.out=length(myCol))
xdiff <- xleft[2]-xleft[1]
plot(0, 0, xlim=c(0,1+xdiff), ylim=c(0,1), type="n", xaxt="n", yaxt="n", xlab="", ylab="", bty="n")
rect(xleft=xleft, xright=xleft+xdiff, ybottom=1-xdiff/3, ytop=1, col=myCol)
text(x=xleft, y=1-xdiff/1.8, labels=100*round(seq(0, 1, length.out=length(myCol)), 2))
if(remake_figs) { dev.off() }
# prepare to calculate values and map them to that color scale
# colorme <- function (val, min.val=0, max.val=1, cols=myCol) {
# colIndex <- round(val*10); colIndex
# if(colIndex == 0) { colIndex <- 1 }
# return(cols[colIndex])
# }
colorme <- function (val, min.val, max.val) {
colIndex <- round(length(myCol) - min.val * (val))
colIndex <- max(1,colIndex)
return(myCol[colIndex])
}
colorme <- colorRamp(myCol, interpolate="linear", alpha=T)
# then use it in a plot
if(remake_figs) { pdf(filename=paste0(imageloc, "method barplot for ", s.short, ", normed.pdf")) }
b <- a$total.counts[c("Phil", "Crit", "Rhet", "Hist", "Disc", "Poet", "Meta", "Othr", "Modl", "Surv", "Intv", "Expt", "Prac", "Ethn", "Clin")] / sum(a$total.counts)
# set up a blank canvas of the right size
plot(0, 0, xlim=c(0.5,0.5+length(b)), ylim=c(0.5,0.5+1), type="n", xaxt="n", yaxt="n", xlab="", ylab="", bty="n")
for(i in 1:length(b)) {
symbols(i,1+length(b)-i,squares=1, add=TRUE, inches=FALSE, bg=colorme(b[i]))
text(i,1+length(b)-i, round(b[i], 2), cex=0.65)
}
barplot(b, main=paste("Method tag percentages for", s.short), col=sapply(b, colorme), horiz=F, las=2)
if(remake_figs) { dev.off() }
# Compare method rankings at target school and home school
if(!exists("compare_method_ranks", mode="function")) { source(file="compare method ranks.R") }
compare_method_ranks("cuny.data", "s.data", colorful=T, pcts=T, betterlabels=c(cuny.short, s.short))
pitt.english <- s.data[grep("English", s.data$Department), ]
pitt.comm <- s.data[grep("Communication", s.data$Department), ]
compare_method_ranks("pitt.english", "pitt.comm", colorful=T, betterlabels=c("Pitt English", "Pitt Communication"), pcts=F)
colSums(s.data[, tagnames])
nrow(s.data)
s.data.simple <- noexcludes[which(noexcludes$School == s), c("Year", tagnames.simple)]
df.s <- data.frame(s.data.simple[, tagnames.simple], row.names=s.data.simple$Pub.number)
## Find out the topic distribution at this school
if(!exists("get.doctopic.grid", mode="function")) { source(file="get doctopic grid.R") }
grid <- as.data.table(get.doctopic.grid()$outputfile)
head(grid)
s.index <- noexcludes.dt[School %in% s, Pub.number]
s.grid <- grid[grid$Pub.number %in% s.index, ]