Random forest. Neural Network
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from time import time
np.random.seed(1337)
df = pd.read_csv('/Users/train.csv')
df['Sex'] = df['Sex'].map({'female':0, 'male':1})
df['Embarked'] = df['Embarked'].map({'C':1, 'S':2, 'Q':3})
df_train = df.iloc[:172,:]
df_train = df_train.drop(['Name', 'Ticket', 'Cabin'], axis=1)
df_train = df_train.dropna()
scaler = StandardScaler()
features = ['Pclass','Sex','Age','Fare']
X_train = scaler.fit_transform(df_train[features].values)
y_train = df_train['Survived'].values
y_train_onehot = pd.get_dummies(df_train['Survived']).values
df_test = df.iloc[712:,:]
df_test = df_test.drop(['Name', 'Ticket', 'Cabin'], axis=1)
df_test = df_test.dropna()
X_test = scaler.transform(df_test[features].values)
y_test = df_test['Survived'].values
#To create a basis for comparison, we train a Random Forest model and record the accuracy on the test set.
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=0, verbose=3)
model = model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
print "\naccuracy", np.sum(y_prediction == y_test) / float(len(y_test))
# 1-layer Neural Network
print X_train[0]
W = np.random.rand(2,4)*0.01
b = np.random.rand(2,) * 0.01
print b
result = np.dot(W, X_train[0]) + b
def softmax(x):
return np.exp(x) / np.exp(x).sum()
result = softmax(result)
print result
print y_train_onehot[0]
label_index = np.argmax(y_train_onehot[0])
print label_index
print "predicted label-0 probability", result[label_index]
loss = -np.log(result[label_index])
print "loss for first passenger", loss
min_loss = 1000
best_weights = ()
start = time()
for i in xrange(1000):
W = np.random.rand(2, 4) / 10
b = np.random.rand(2, ) / 10
scores = []
loss = 0
for j in xrange(X_train.shape[0]):
result = np.dot(W, X_train[j]) + b
result = softmax(result)
scores.append(list(result))
label_index = np.argmax(y_train_onehot[j])
loss += -np.log(result[label_index])
loss = loss / float(X_train.shape[0])
y_prediction = np.argmax(np.array(scores), axis=1)
accuracy = np.sum(y_prediction == y_train) / float(len(y_train))
if loss < min_loss:
min_loss = loss
best_weights = (W, b)
print "loss %s accuracy %s loop %s" % (round(loss, 3), round(accuracy, 3), i)
print "\ntime taken %s seconds" % str(time() - start)
W, b = best_weights
scores = []
for j in xrange(X_test.shape[0]):
result = np.dot(W, X_test[j]) + b
result = softmax(result)
scores.append(list(result))
y_prediction = np.argmax(np.array(scores), axis=1)
print "accuracy", np.sum(y_prediction == y_test) / float(len(y_test))
# 2-layer Neural Network
W_1 = np.random.rand(100, 4) * 0.01
b_1 = np.random.rand(100,) * 0.01
W_2 = np.random.rand(2, 100) * 0.01
b_2 = np.random.rand(2,) * 0.01
result = np.dot(W_1, X_train[0]) + b_1
result = np.dot(W_2, result) + b_2
min_loss = 1000
best_weights = ()
start = time()
for i in xrange(1000):
W_1 = np.random.rand(100, 4) * 0.01
b_1 = np.random.rand(100, ) * 0.01
W_2 = np.random.rand(2, 100) * 0.01
b_2 = np.random.rand(2, ) * 0.01
scores = []
loss = 0
for j in xrange(X_train.shape[0]):
result = np.dot(W_1, X_train[j]) + b_1
result = np.dot(W_2, result) + b_2
result = softmax(result)
scores.append(list(result))
label_index = np.argmax(y_train_onehot[j])
loss += -np.log(result[label_index])
loss = loss / float(X_train.shape[0])
y_prediction = np.argmax(np.array(scores), axis=1)
accuracy = np.sum(y_prediction == y_train) / float(len(y_train))
if loss < min_loss:
min_loss = loss
best_weights = (W_1, b_1, W_2, b_2)
print "loss %s accuracy %s loop %s" % (round(loss, 3), round(accuracy, 3), i)
print "\ntime taken %s seconds" % str(time() - start)
W_1, b_1, W_2, b_2 = best_weights
scores = []
for j in xrange(X_test.shape[0]):
result = np.dot(W_1, X_test[j]) + b_1
result = np.dot(W_2, result) + b_2
result = softmax(result)
scores.append(list(result))
y_prediction = np.argmax(np.array(scores), axis=1)
print "accuracy", np.sum(y_prediction == y_test) / float(len(y_test))
https://github.com/savarin/neural-networks
Deep learning이란???
서칭을 해보니 전에 포스팅한 Monte Carlo Simulation이나 Optimization의 로직과 유사한 점이 느껴졌다.
위 코드는 Titanic dataset에 1-Layer neural network를 사용한 코드인데,
input -> Function1 -> Output 에 필요한 Function1을 구하여 예측을 한다.
이때 Function1은 Loop를 돌려 Error가 최소가 되는 Parameter를 구하여 만든다.
(2-Layer Neural Network는 input -> Function1 -> Function2 -> Output와 같은 로직)
Optimization, Monte Carlo method에서는 일정 범위오차에 수렴하게 되는 parameter를 구했던 것 같다.
Deep learning, Monte Carlo simulation은 귀납적. Optimization은 연역적인 느낌
Deep learning이 뜨는 이유가 조금씩 이해된다.
데이터는 갈수록 많아지고 Computing power는 좋아지니 그럴 수 밖에 없겠다.
생각해보면 방정식의 해를 구하는 방법중 Newton method도 비슷한 로직인 것 같다.
f(a) < 0, f(b) > 0인 a,b사이의 어느 근사값을 구했던 것 같은데 Loop를 만들었던 것 같다.
equation을 만들기도 힘들고, 풀기도 힘드니 막 찍어서 오차가 최소가 되는 함수를 만든다.
Deep learning이 정말 새로운 개념은 아닌 것 같다는 생각이 많이 든다.
예전부터 있었던 개념인데 컴퓨터로 이제 구현할 수 있게된 느낌..