/
.Rhistory
35 lines (35 loc) · 1.48 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
setwd('/Users/ssharma/code/nss-ds/bechdel_test')
library("jsonlite")
library("tidyverse")
library("dplyr")
library("magrittr")
library("ggplot2")
library("readxl")
library("ggvis")
bechdel_scores <- read_json('rawData/bechdel_scores', simplifyVector = TRUE)
View(bechdel_scores)
ratings <- read_tsv('rawData/title.ratings.tsv')
name <- read_tsv('rawData/name.basics.tsv')
crew <- read_tsv('rawData/title.crew.tsv')
title <- read_tsv('rawData/title.basics.tsv')
bechdel_scores$tconst <- paste('tt',bechdel_scores$imdbid, sep = '')
bechdel_merge <- merge(bechdel_scores, crew, by='tconst')
bechdel_merge <- merge(bechdel_merge, ratings, by='tconst')
bechdel_merge <- merge(bechdel_merge, title, by='tconst')
bechdel_merge$nconst <- bechdel_merge$directors
bechdel_merge2 <- merge(bechdel_merge, name, by = 'nconst')
glimpse(bechdel_merge2)
bechdel_merge2$birthYear <- as.numeric(bechdel_merge2$birthYear)
library(gender)
library(genderdata)
data(package = "genderdata")
gender(c("Madison", "Hillary"), years = 2000, method = "ssa")
bechdel_merge2 <- bechdel_merge2 %>%
separate(primaryName, c("Firstname", "Lastname"), " ")
gender_prediction <- gender(c(bechdel_merge2$Firstname), years = c(1880, 1992), method = "ssa")
gender_pred <- gender_prediction %>%
distinct(name,gender,year_min,year_max,proportion_male,proportion_female)
gender_pred$Firstname <- gender_pred$name
bechdel_merge3 <- merge(bechdel_merge2, gender_pred, by= 'Firstname')
View(bechdel_merge3)
write_tsv(bechdel_merge3, 'data/bechdel_merge.tsv')