/
linear_regression_sept2020.py
114 lines (87 loc) · 2.97 KB
/
linear_regression_sept2020.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 22 14:14:44 2020
@author: lehrs
"""
import numpy as np
# %matplotlib inline
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# represents the heights of a group of people in metres
heights = [[1.6], [1.65], [1.7], [1.73], [1.8]]
# represents the weights of a group of people in kgs
weights = [[60], [65], [72.3], [75], [80]]
plt.title('Weights plotted against heights')
plt.xlabel('Heights in metres')
plt.ylabel('Weights in kilograms')
plt.plot(heights, weights, 'k.')
# axis range for x and y
plt.axis([1.5, 1.85, 50, 90])
plt.grid(True)
# Create and fit the model
model = LinearRegression()
model.fit(X=heights, y=weights)
plt.show()
# make a prediction, expects multidimension array
# make a single prediction
a1 = model.predict([[1.75]])
a1[0,0] # comes back as a multi-dimensional array first row, first column [0][0] or [0,0]
a1[0][0]
# Out[25]: 76.0387
# plot the regression line
extreme_heights = [[0], [1.8]]
extreme_weights = model.predict(extreme_heights)
plt.plot(extreme_heights, extreme_weights, 'b*')
print(model.intercept_[0])
print(np.round(model.intercept_[0], 2))
print(model.coef_)
print(model.coef_[0])
print(model.coef_[0][0])
print(np.round(model.coef_[0][0], 2))
pw = model.predict(heights) # compute predicted weights from the model
plt.plot(heights, weights, 'b*')
plt.plot(heights, pw, 'k.')
plt.plot(heights, pw, 'r')
plt.show()
# bottom of page 104 Residual Sum of Squares
# verify this old school way
weights - pw
((weights - pw)**2)
np.sum((weights-pw)**2)
mu = np.mean(weights)
print('Mean weight %.3f' % mu)
dw_sum = 0;
tss = 0;
for i in range(len(weights)):
dw = weights[i][0]-pw[i][0]
dw_squared = dw**2
dw_sum = dw_sum + dw_squared
var = weights[i] - mu
var_squared = var**2
tss = tss + var_squared
print('%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % (weights[i][0], pw[i][0], dw, dw_squared, var, var_squared))
print('residual sum is %.3f' % dw_sum)
print('total sum is %.3f' % tss)
print('R Squared %.4f' % (1 - dw_sum/tss))
print('Residual sum of squares: %.2f' %
np.sum((weights - model.predict(heights)) ** 2))
# RSS should be small as possible
# test data
heights_test = [[1.58], [1.62], [1.69], [1.76], [1.82]]
weights_test = [[58], [63], [72], [73], [85]]
# Total Sum of Squares (TSS)
weights_test_mean = np.mean(np.ravel(weights_test))
TSS = np.sum((np.ravel(weights_test) -
weights_test_mean) ** 2)
print("TSS: %.2f" % TSS)
# Residual Sum of Squares (RSS)
RSS = np.sum((np.ravel(weights_test) -
np.ravel(model.predict(heights_test)))
** 2)
print("RSS: %.2f" % RSS)
# R_squared
R_squared = 1 - (RSS / TSS)
print("R-squared: %.2f" % R_squared)
# using scikit-learn to calculate r-squared
print('R-squared: %.4f' % model.score(heights_test,
weights_test))