在多元线性回归中会用到梯度下降来计算参数值。这里我用python实现一个梯度下降版本。
这里多元线性方程为 y = A0+A1*x1+...+An* xn
数据输入格式,y表示
|
1 |
y \t x1 \t x2 \t .... xn |
代码如下:
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 |
import
osimport
systheta =
[]training_data =
[]h_value =
[]alpha =
0.0000009def load(path): f =
open(path,‘r‘) for
x in
f: x =
x.strip(‘\r\n‘) field =
x.split(‘\t‘) v_list =
[] for
v in
field: v_list.append(int(v)) training_data.append(v_list) f.close() for
x in
training_data: h_value.append(0.0)def
init(path,theta_num): for
x in
range(theta_num): theta.append(1.0) load(path);def
gradient(): i =
0 loss =
100.0 theta_num =
len(theta) data_num =
len(training_data) while
i < 3000
and loss > 0.0001: #compute hvalue for
index in
range(data_num): hv =
theta[0] for
k in
range(1,theta_num): hv +=
theta[k]*training_data[index][k] h_value[index] =
hv #update theta for
index in
range(theta_num): s =
0.0 for
k in
range(data_num): if
index ==
0: s +=
(h_value[k] -
training_data[k][0])*1 else: s +=
(h_value[k] -
training_data[k][0])*training_data[k][index] theta[index] =
theta[index] -
alpha *
1/data_num *
(s) #compute loss loss =
0.0 for
index in
range(data_num): hv =
theta[0] /
(2*data_num) for
k in
range(1,theta_num): hv +=
theta[k]*training_data[index][k] loss +=
pow((hv -
training_data[index][0]),2)/(2*data_num) print
loss i +=
1 for
x in
theta: print
x,if __name__==‘__main__‘: path =
sys.argv[1] init(path,int(sys.argv[2])) gradient() sys.exit(0) |
原文:http://www.cnblogs.com/clyskyblue/p/3594991.html