/
setwise data.R
executable file
·111 lines (90 loc) · 3.03 KB
/
setwise data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
## Get some basic output stats for a set of schools
setwise.data <- function(dataset_name="consorts") {
require(doBy)
dataset <- get(dataset_name)
b <- summaryBy(. ~ School, data=dataset, FUN=length)[, 1:2]
names(b) <- c("School", "Counts")
b <- b[order(b$Counts, decreasing=T), ]
set.mean <- mean(b$Counts)
set.median <- median(b$Counts)
set.total <- sum(b$Counts)
if(remake_figures) {
filename <- paste0(imageloc, "Counts by school, ", dataset_name, ".txt")
write(b, filename)
}
return(list(set = b,
total = set.total,
mean = set.mean,
median = set.median))
}
if(autorun) {
setwise.data("noexcludes")
setwise.data("consorts")
setwise.data("top.nonconsorts")
setwise.data("consorts.plus")
}
plot.outputs <- function(dataset_name="consorts.plus", show.stats=TRUE, show.labels=TRUE, label.top=6,
subset_name=NULL, subset.labels=FALSE, subset.color="red") {
dataset <- get(dataset_name)
cfreq <- table(factor(dataset$School))
cfreq <- sort(cfreq, decreasing=TRUE)
main <- paste(dataset_name, "by dissertation count")
if(remake_figs) {
filename <- paste0(imageloc, "dissertation counts, ", dataset_name)
if(!is.null(subset_name)) { filename <- paste0(filename, " with ", subset_name) }
if(show.labels) { filename <- paste0(filename, ", ", dataset_name, " labeled") }
if(subset.labels) { filename <- paste0(filename, ", ", subset_name, " labeled") }
filename <- paste0(filename, ".pdf")
pdf(filename)
}
plot(cfreq, type="o", pch=18, bty="n", xlab="Schools", ylab="Number of Dissertations 2001-2010")
title(main)
if(show.stats) {
legend(x=0, y=mean(cfreq)+7,
legend=c(paste("Mean =", round(mean(cfreq),2)),
paste("Median =",round(median(cfreq),2))),
bty="n"
)
abline(h = median(cfreq), col="forestgreen")
abline(h = mean(cfreq), col="blue")
}
if(show.labels) {
text(cfreq[1:label.top], labels=paste0(names(cfreq[1:label.top])," (",cfreq[1:label.top],")"), pos=4, offset=1)
}
if(!is.null(subset_name)) {
subset <- get(subset_name)
subsetfreq <- table(factor(subset$School))
subsetfreq <- sort(subsetfreq, decreasing=TRUE)
index <- which(names(cfreq) %in% names(subsetfreq))
if(!is.null(subset.color)) {
points(x=index,
y=subsetfreq,
pch=18,
# add=TRUE,
# inches=FALSE,
col=subset.color
)
legend("topright",
c("Consortium", "Non-Consortium"),
fill=c("black", subset.color),
border=c("black", subset.color),
bty="n",
)
}
if(subset.labels) {
text(subsetfreq[1:label.top],
labels=paste0(names(subsetfreq[1:label.top]), " (", subsetfreq[1:label.top], ")"),
pos=4,
offset=8
)
}
}
if(remake_figs) { dev.off() }
}
if(autorun) {
remake_figs
plot.outputs("consorts.plus", show.stats=F)
plot.outputs("consorts.plus", subset_name="top.nonconsorts", show.stats=T, show.labels=F, subset.labels=F, subset.color="red")
plot.outputs("noexcludes", show.labels=F)
plot.outputs("realconsorts", show.stats=F, show.labels=T, label.top=4)
}