/
scrap.R
36 lines (30 loc) · 909 Bytes
/
scrap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
library(rvest)
library(dplyr)
setwd("~/Desktop/refutbol")
codes=read.csv("countryCodes.csv",stringsAsFactors = FALSE)
matchs=read.csv("odds.csv",stringsAsFactors = FALSE)%>%
select(HomeTeam, AwayTeam)%>%
left_join(codes,by=(c("HomeTeam"="Country")))%>%
left_join(codes,by=(c("AwayTeam"="Country")))%>%
mutate(link=paste0("http://www.soccerbase.com/teams/head_to_head.sd?team_id=",Code.x,"&team2_id=",Code.y))
matchs$p1=0
matchs$p2=0
matchs$p3=0
for (i in 1:nrow(matchs)){
link=matchs$link[i]
print(link)
text=read_html(link) %>%
html_nodes(".total")%>%
html_text()%>%
gsub("^\\s+|\\s+$", "", .)
if(length(text)>0){
table=read.table(text=text, sep = "\n",header = T)
matchs$p1[i]=table[1,1]
matchs$p2[i]=table[2,1]
matchs$p3[i]=table[3,1]
}
}
matchs=matchs%>%
select(HomeTeam,AwayTeam,p1,p2,p3)
colnames(matchs)[1:2]=c("Home","Away")
write.csv(matchs,"ScrapHistoric.csv")