-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
74 lines (74 loc) · 2.47 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
getwd()
[1] "/Users/laura"
# change the working directory
setwd('/Users/laura/Desktop/classes/Coursera/Data-Scientific-Toolbox/Getting-and-Cleaning-Data')
#
# download data
library('httr')
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
file <- "getdata-projectfiles-UCI HAR Dataset.zip"
if(!file.exists(file)){
print("... working downloading")
download.file(url, file, method="curl")
}
#
#unzip the file, and create folders
dFolder <- "UCI HAR Dataset"
rFolder <- "results"
if(!file.exists(dFolder)){
print("... unzip file")
unzip(file, list = FALSE, overwrite = TRUE)
}
if(!file.exists(rFolder)){
print("this is the results folder")
dir.create(rFolder)
}
#
# function to read txt data files and convert them to data.frame
gettables <- function(filename, cols = NULL){
print(paste("Getting Tables: ", filename))
f <- paste(dFolder, filename, sep="/")
data <- data.frame()
if(is.null(cols)){
data <- read.table(f, sep="", stringsAsFactors=F)
} else {
data <- read.table(f, sep="", stringsAsFactors=F, col.names = cols)
}
data
}
#
# run and check gettables
features <- gettables("features.txt")
#
# function to read data and prepare them
getdata <- function(type, features){
print(paste("Getting Data", type))
sData <- gettables(paste(type, "/", "subject_", type, ".txt", sep=""),"id")
y_data <- gettables(paste(type,"/","y_",type,".txt",sep=""),"activity")
x_data <- gettables(paste(type,"/","X_",type,".txt",sep=""),features$V2)
return (cbind(sData,y_data,x_data))
}
# run and check data
test <- getdata("test", features)
train <- getdata("train", features)
#
# save the resulting data in the indicated folder
saveresults <- function(data,name){
print(paste("saving results", name))
file <- paste(rFolder, "/", name,".csv" ,sep="")
write.csv(data,file)
}
# after getting prepare the files
library(plyr)
data <- rbind(train, test)
# arrange the datab by "id"
data <- arrange(data, id)
# extracts only the the measurements of the mean and standard deviation
mean_and_std <- data[,c(1,2,grep("std", colnames(data)), grep("mean", colnames(data)))]
saveresults(mean_and_std,"mean_and_std")
#
activity_labels <- gettables("activity_labels.txt")
data$activity <- factor(data$activity, levels=activity_labels$V1, labels=activity_labels$V2)
tidy_dataset <- ddply(mean_and_std, .(id, activity), .fun=function(x){ colMeans(x[,-c(1:2)]) })
colnames(tidy_dataset)[-c(1:2)] <- paste(colnames(tidy_dataset)[-c(1:2)], "_mean", sep="")
saveresults(tidy_dataset,"tidy_dataset")