-
Notifications
You must be signed in to change notification settings - Fork 2
/
helpers.R
90 lines (78 loc) · 2.88 KB
/
helpers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Abstracted Helper Functions
# Jess, Anna and Seth Project
# 11/29/19
source('packages.R')
# Read the data
read_data <- function() {
df_names <- c("teams0405.csv", "teams0506.csv", "teams0607.csv",
"teams0708.csv", "teams0809.csv", "teams0910.csv",
"teams1011.csv", "teams1112.csv", "teams1213.csv",
"teams1314.csv", "teams1415.csv", "teams1516.csv",
"teams1617.csv", "teams1718.csv", "teams1819.csv")
df <- read.csv('data/teams0304.csv')
df$year <- 2003
for (name in df_names) {
teams <- read.csv(paste('data/', name, sep=""))
year <- paste("20", substr(name, 6, 7), sep = "")
teams$year <- as.numeric(year)
df <- rbind(df, teams)
}
return(df)
}
# Check Dimensions
dim_checker <- function(df) {
if (length(unique(df$School)) * length(unique(df$year)) == dim(df)[1]) {
print("Dim Check Successful")
} else {
print("Dim Check Failed");
print("# of unique schools:"); print(length(unique(df$School)));
print("# of unique years:"); print(length(unique(df$year)));
print("# of rows:"); print(dim(df)[1]);
}
}
# Read in df and Add time
add_time <- function(df_str){
df <- read.csv(paste("data/", df_str, sep = ""))
df <- select (df,-c(X))
df$time <- df$year - 2003
return(df)
}
# Per game-ify certain columns
get_prop_df <- function(df) {
if ("same.coach" %in% names(df)) {
drops_names <- c("year", "time", "W.L.", "SRS", "SOS","FTr","X3PAr","TS.", "TRB.",
"AST.", "BLK.", "eFG.", "TOV.", "FT.FGA", "FG.", "X3P.", "FT.",
"same.coach")
} else {
drops_names <- c("year", "time", "W.L.", "SRS", "SOS","FTr","X3PAr","TS.", "TRB.",
"AST.", "BLK.", "eFG.", "TOV.", "FT.FGA", "FG.", "X3P.", "FT.")
}
keep = df[drops_names]
drop = df[ , !(names(df) %in% drops_names)]
keep$id = 1:nrow(keep)
#drop$id = 1:nrow(drop)
new_drop = drop[, 3:ncol(drop)]/(drop$G)
prop_df = cbind(new_drop, keep)
return(prop_df)
}
# Add coaching change variable
add_coach_change <- function(df) {
schools <- c("Syracuse", "Duke", "Oakland", "Davidson", "Lafayette", "Michigan",
"Gonzaga", "Northwestern", "Yale", "Notre Dame", "Albany (NY)",
"Saint Mary's (CA)", "Villanova", "Florida State", "Baylor",
"Kansas", "North Carolina", "Western Michigan")
schools <- paste0('^', schools, '$')
pattern = paste(schools, collapse='|')
df$same.coach = grepl(pattern, df$School)
df[df$School == "Saint Mary's (CA)",]$same.coach = TRUE
df[df$School == "Albany (NY)",]$same.coach = TRUE
return(df)
}
# Contrast test for lmer9a
contrast_test_lmer9a <- function(C, coefs) {
estimate = t(coefs)%*%C
std.err =sqrt(t(C) %*%vcov(lmer9a) %*%C)
contrast.t = estimate/std.err
p.value = 2*(1-pt(abs(contrast.t[1]),df=df.residual(lmer9a) - 2));
return(list(tstat=contrast.t, pval = p.value))
}