-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.lua
More file actions
165 lines (151 loc) · 6.24 KB
/
main.lua
File metadata and controls
165 lines (151 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
--Author: Chun-Hao Liu
-- Date: 06/26/2016
-- Goal: End-to-end flow for Deep learning to predict online housing price
-- Comment: read data -> data normalization -> data separation -> train model -> prediction -> write results
-- Command: th main.lua
-- Result: Generate deep learning model with trained parameters and prediction error
require 'torch'
require 'math'
--Data parameters:
number = 131 --Total number of training + testing data
feature = 9 -- First with object value and rest with feature number
train_data_ratio = 0.8 --Percentage of data that is used for training
validation_type = 'k-fold' --Support two modes validation: 'normal' and 'k-fold' validation
k_fold = 10 --Set k-fold number
--Model parameters:
local opt = {
nonlinearity_type = 'sigmoid',
training_iterations = 150, -- note: the code uses *batches*, not *minibatches*, now.
print_every = 25, -- how many iterations to skip between printing the loss
model_type = 'convnet', -- Currently only support 'mlp', 'convnet', and 'rnn'
L1_Reg = 0, -- L1 regularization parameter; set to 0 if no use
L2_Reg = 0 -- L2 regularization parameter; set to 0 if no use
}
--Load txt file data:
local file = 'Data_Preprocess.txt'
local f = io.open(file,'rb')
X = torch.Tensor(number,feature):zero()
local i = 1
for line in io.lines(file) do
local j = 1
for word in line:gmatch("%w+.%w+") do
X[i][j] = word
j = j + 1
end
i = i + 1
end
--Data normalization with mean and standard variation:
m = torch.mean(X,1)
s = torch.std(X,1)
Xs = torch.Tensor(number,feature):zero()
Xs = torch.cdiv(X - m:repeatTensor(number,1),s:repeatTensor(number,1))
if validation_type == 'normal' then
--Data randomization to separate it into training and testing data set
torch.manualSeed(1) -- fix random seed so program runs the same every time
num_train = torch.floor(number * train_data_ratio)
num_test = number - num_train
index = torch.randperm(number)
X_train = torch.Tensor(num_train,feature):zero()
X_test = torch.Tensor(num_test,feature):zero()
for i = 1,number do
if i <= num_train then
X_train[i] = Xs[index[i]]
else
X_test[i-num_train] = Xs[index[i]]
end
end
--Create deep learning model
local train = require 'train'
-- Train deep learning model
print('Start training ' .. opt.model_type .. ' model......')
model, losses, losses_test = train(opt, X_train, X_test)
--Test performance
local Test_Prediction = torch.Tensor(num_test):zero()
local Test_Prediction_Loss = torch.Tensor(num_test):zero()
local X_test_sized = torch.Tensor(8,1):zero()
for i=1, num_test do
if opt.model_type == 'mlp' or opt.model_type == 'rnn' then
Test_Prediction[i] = model:forward(X_test[i][{{2,9}}])
elseif opt.model_type == 'convnet' then
X_test_sized:copy(X_test[i][{{2,9}}])
Test_Prediction[i] = model:forward(X_test_sized)
end
Test_Prediction_Loss[i] = torch.pow(Test_Prediction[i] - X_test[i][1],2)
end
print('Prediction MSE is: ')
print(torch.cumsum(Test_Prediction_Loss)[num_test]/num_test)
--Predict actual value for the all data set and write to file
--Write to txt file data:
print('Writing all predicted data to file......')
local file = 'Data_Prediction.txt'
local fo = io.open(file,'wb')
local Xs_sized = torch.Tensor(8,1):zero()
local Prediction_All = {}
for i=1, number do
if opt.model_type == 'mlp' or opt.model_type == 'rnn' then
Prediction_All = model:forward(Xs[i][{{2,9}}])
elseif opt.model_type == 'convnet' then
Xs_sized:copy(Xs[i][{{2,9}}])
Prediction_All = model:forward(Xs_sized)
end
Prediction_All = Prediction_All * s[1][1] + m[1][1]
fo:write(tostring(i),". ",tostring(Prediction_All[1]),"\n")
end
-- Plot loss functions:
gnuplot.figure()
gnuplot.xlabel('Iterations')
gnuplot.ylabel('MSE')
gnuplot.plot({'Training',
torch.range(1, #losses), -- x-coordinates
torch.Tensor(losses), -- y-coordinates
'-'},
{'Testing',
torch.range(1, #losses_test), -- x-coordinates
torch.Tensor(losses_test), -- y-coordinates
'o'}
)
elseif validation_type == 'k-fold' then
--Separate all data into k-folds
num_per_fold = torch.ceil(number/k_fold)
num_per_fold_last = number - num_per_fold * (k_fold-1)
--Create deep learning model
local train = require 'train'
local loss_k_fold = 0
for i=1, k_fold do
print('Train the ' .. tostring(i) .. '-th fold cross-validation')
--subsampling the training set and test:
if i~= 1 and i~= k_fold then
local X_train_sub = torch.Tensor(number - num_per_fold,feature):zero()
local X_test_sub = torch.Tensor(num_per_fold,feature):zero()
X_test_sub:copy(Xs:sub(1+num_per_fold*(i-1),num_per_fold*i,1,feature))
X_train_sub:sub(1,num_per_fold*(i-1),1,feature):copy(Xs:sub(1,num_per_fold*(i-1),1,feature))
X_train_sub:sub(1+num_per_fold*(i-1),number-num_per_fold,1,feature):copy(Xs:sub(1+num_per_fold*i,number,1,feature))
--Train deep learning model
print('Start training ' .. opt.model_type .. ' model......')
model, losses, losses_test = train(opt, X_train_sub,X_test_sub)
loss_k_fold = loss_k_fold + losses_test[#losses_test]
elseif i==1 then
local X_train_sub = torch.Tensor(number - num_per_fold,feature):zero()
local X_test_sub = torch.Tensor(num_per_fold,feature):zero()
X_test_sub:copy(Xs:sub(1+num_per_fold*(i-1),num_per_fold*i,1,feature))
X_train_sub:copy(Xs:sub(1+num_per_fold*i,number,1,feature))
--Train deep learning model
print('Start training ' .. opt.model_type .. ' model......')
model, losses, losses_test = train(opt, X_train_sub,X_test_sub)
loss_k_fold = loss_k_fold + losses_test[#losses_test]
elseif i==k_fold then
local X_train_sub = torch.Tensor(number-num_per_fold_last,feature):zero()
local X_test_sub = torch.Tensor(num_per_fold_last,feature):zero()
X_test_sub:copy(Xs:sub(number-num_per_fold_last+1,number,1,feature))
X_train_sub:copy(Xs:sub(1,number-num_per_fold_last,1,feature))
--Train deep learning model
print('Start training ' .. opt.model_type .. ' model......')
model, losses, losses_test = train(opt, X_train_sub,X_test_sub)
loss_k_fold = loss_k_fold + losses_test[#losses_test]
end
end
print('K-fold cross validation loss is:')
print(loss_k_fold/k_fold)
else
print('No such validation type supported!')
end