6-1. 신경망(데이터분석 코딩패턴)

1. imports

import torch
import torchvision
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (4.5, 3.0)

# 복습
# --- 
# 시벤코정리 - 다 맞출수있어 (train) 
# 오버피팅 - 그게 의미가 없을텐데 (test 에서 잘하는게 의미가 있다) 
# 드랍아웃 - 대충대충 학습하면 오히려 좋을지도 --> 이게 성공함 // 랜덤포레스트?
#----#
# GPU 메모리 아깝다.. (비싸거든) 
# 그래서 확률적경사하강법 
# 꼭 돈이 없어서 이 알고리즘을 만든것 같지만 그런건 아님 
# 확률적경사하강법은 알고리즘 자체에 장점이 있음 
# -- 장점1: 데이터를 조금씩쓰면서 update // 대충대충하는 느낌 ---> 오버핏을 눌러주는 효과 // 배깅?
# -- 장점2: global min 이 있고, local min 있을때, local min을 잘 탈출하는 효과가 있음

- 오늘할것: train/test 이 존재하는 데이터 셋팅에서 Dropout 레이어도, 미니배치, GPU쓰기

A. 일반적인 train/test 셋팅

- Step1 : 데이터 정리

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True)
to_tensor = torchvision.transforms.ToTensor()
X0 = torch.stack([to_tensor(img) for img, lbl in train_dataset if lbl==0])
X1 = torch.stack([to_tensor(img) for img, lbl in train_dataset if lbl==1])
X = torch.concat([X0,X1],axis=0).reshape(-1,784)
y = torch.tensor([0.0]*len(X0) + [1.0]*len(X1)).reshape(-1,1)
XX0 = torch.stack([to_tensor(img) for img, lbl in test_dataset if lbl==0])
XX1 = torch.stack([to_tensor(img) for img, lbl in test_dataset if lbl==1])
XX = torch.concat([XX0,XX1],axis=0).reshape(-1,784)
yy = torch.tensor([0.0]*len(XX0) + [1.0]*len(XX1)).reshape(-1,1)

X.shape, y.shape

(torch.Size([12665, 784]), torch.Size([12665, 1]))

XX.shape, yy.shape

(torch.Size([2115, 784]), torch.Size([2115, 1]))

- Step2: 학습가능한 오브젝트들의 설정 (모델링과정 포함)

torch.manual_seed(43052)
net = torch.nn.Sequential(
    torch.nn.Linear(784,32),
    torch.nn.ReLU(),
    torch.nn.Linear(32,1),
    torch.nn.Sigmoid()
)
loss_fn = torch.nn.MSELoss()
optimizr = torch.optim.SGD(net.parameters())

- Step3: 학습 (=적합)

for epoc in range(1,501):
    #---에폭시작---# 
    # 1 
    yhat = net(X) 
    # 2 
    loss = loss_fn(yhat,y) 
    # 3 
    loss.backward()
    # 4 
    optimizr.step()
    optimizr.zero_grad()
    #---에폭끝---# 
    #에폭마다 내가 보고싶은것들을 보여주는 코드
    if (epoc % 50) ==0:
        acc = ((net(X).data > 0.5) == y).float().mean().item()
        print(f"# of epochs={epoc}   \t train_acc = {acc:.4f}")

# of epochs=50       train_acc = 0.4677
# of epochs=100      train_acc = 0.4677
# of epochs=150      train_acc = 0.4757
# of epochs=200      train_acc = 0.5295
# of epochs=250      train_acc = 0.6632
# of epochs=300      train_acc = 0.7929
# of epochs=350      train_acc = 0.8731
# of epochs=400      train_acc = 0.9206
# of epochs=450      train_acc = 0.9465
# of epochs=500      train_acc = 0.9634

- Step4: 예측 & 결과분석

-train acc

((net(X).data > 0.5) == y).float().mean()

tensor(0.9634)

- test acc

((net(XX).data>0.5) == yy).float().mean()

tensor(0.9749)

B. Dropout 사용

- Step1: 데이터정리

pass

- Step2: 학습가능한 오브젝트들의 설정 (모델링과정 포함)

torch.manual_seed(43052)
net = torch.nn.Sequential(
    torch.nn.Linear(784,32),
    torch.nn.Dropout(0.9),
    torch.nn.ReLU(),
    torch.nn.Linear(32,1),
    torch.nn.Sigmoid()
)
loss_fn = torch.nn.MSELoss()
optimizr = torch.optim.SGD(net.parameters())

- Step3: 학습 (=적합)

for epoc in range(1,501):
    net.train()
    #---에폭시작---# 
    # 1 
    yhat = net(X) 
    # 2 
    loss = loss_fn(yhat,y) 
    # 3 
    loss.backward()
    # 4 
    optimizr.step()
    optimizr.zero_grad()
    #---에폭끝---# 
    net.eval()
    #에폭마다 내가 보고싶은것들을 보여주는 코드
    if (epoc % 50) ==0:
        acc = ((net(X).data > 0.5) == y).float().mean().item()
        print(f"# of epochs={epoc}   \t train_acc = {acc:.4f}")

# of epochs=50       train_acc = 0.4677
# of epochs=100      train_acc = 0.4677
# of epochs=150      train_acc = 0.4744
# of epochs=200      train_acc = 0.5215
# of epochs=250      train_acc = 0.6435
# of epochs=300      train_acc = 0.7675
# of epochs=350      train_acc = 0.8468
# of epochs=400      train_acc = 0.8978
# of epochs=450      train_acc = 0.9301
# of epochs=500      train_acc = 0.9492

- Step4: 예측 & 결과분석

- train acc

((net(X).data > 0.5) == y).float().mean()

tensor(0.9492)

- test acc

((net(XX).data>0.5) == yy).float().mean()

tensor(0.9626)

C. GPU도 사용

- step1: 데이터 정리

pass

- Step2: 학습가능한 오브젝트들의 설정 (모델링과정 포함)

torch.manual_seed(43052)
net = torch.nn.Sequential(
    torch.nn.Linear(784,32),
    torch.nn.Dropout(0.9),
    torch.nn.ReLU(),
    torch.nn.Linear(32,1),
    torch.nn.Sigmoid()
).to("cuda:0")
loss_fn = torch.nn.MSELoss()
optimizr = torch.optim.SGD(net.parameters())

- Step3: 학습 (=적합)

for epoc in range(1,501):
    net.train()
    #---에폭시작---# 
    X = X.to("cuda:0")
    y = y.to("cuda:0")
    # 1 
    yhat = net(X) 
    # 2 
    loss = loss_fn(yhat,y) 
    # 3 
    loss.backward()
    # 4 
    optimizr.step()
    optimizr.zero_grad()
    #---에폭끝---# 
    net.eval()
    #에폭마다 내가 보고싶은것들을 보여주는 코드
    if (epoc % 50) ==0:
        acc = ((net(X).data > 0.5) == y).float().mean().item()
        print(f"# of epochs={epoc}   \t train_acc = {acc:.4f}")

# of epochs=50       train_acc = 0.4677
# of epochs=100      train_acc = 0.4677
# of epochs=150      train_acc = 0.4745
# of epochs=200      train_acc = 0.5223
# of epochs=250      train_acc = 0.6441
# of epochs=300      train_acc = 0.7686
# of epochs=350      train_acc = 0.8469
# of epochs=400      train_acc = 0.8979
# of epochs=450      train_acc = 0.9302
# of epochs=500      train_acc = 0.9492

- Step4: 예측 & 결과분석

- train acc

((net(X).data > 0.5) == y).float().mean()

tensor(0.9492, device='cuda:0')

- test acc

- net 이 cuda에 있기 때문에 데이터도 cuda로!

XX = XX.to("cuda:0")
yy = yy.to("cuda:0")

((net(XX).data>0.5) == yy).float().mean()

tensor(0.9626, device='cuda:0')

D. 미니배치도 사용

- Step1: 데이터정리

X = X.to("cpu")
y = y.to("cpu")
XX = XX.to("cpu")
yy = yy.to("cpu")

ds  = torch.utils.data.TensorDataset(X,y)
dl = torch.utils.data.DataLoader(ds,batch_size = 16, shuffle=True)

- Step2: 학습가능한 오브젝트들의 설정 (모델링과정 포함)

torch.manual_seed(43052)
net = torch.nn.Sequential(
    torch.nn.Linear(784,32),
    torch.nn.Dropout(0.9),
    torch.nn.ReLU(),
    torch.nn.Linear(32,1),
    torch.nn.Sigmoid()
).to("cuda:0")
loss_fn = torch.nn.MSELoss()
optimizr = torch.optim.SGD(net.parameters())

- Step3: 학습 (=적합)

for epoc in range(1,3):
    net.train()
    #---에폭시작---# 
    for Xm,ym in dl:         
        Xm = Xm.to("cuda:0")
        ym = ym.to("cuda:0")
        # 1 
        ym_hat = net(Xm) 
        # 2 
        loss = loss_fn(ym_hat,ym) 
        # 3 
        loss.backward()
        # 4 
        optimizr.step()
        optimizr.zero_grad()
    #---에폭끝---# 
    net.eval()
    #에폭마다 내가 보고싶은것들을 보여주는 코드
    s = 0 
    for Xm, ym in dl:
        Xm = Xm.to("cuda:0")
        ym = ym.to("cuda:0")
        s = s + ((net(Xm) > 0.5) == ym).float().sum()
    acc = s/12665        
    print(f"# of epochs={epoc}   \t train_acc = {acc:.4f}")

# of epochs=1        train_acc = 0.9860
# of epochs=2        train_acc = 0.9931

- Step4: 예측 & 결과분석

- 이번에는 net을 cpu로

net.to("cpu")

Sequential(
  (0): Linear(in_features=784, out_features=32, bias=True)
  (1): Dropout(p=0.9, inplace=False)
  (2): ReLU()
  (3): Linear(in_features=32, out_features=1, bias=True)
  (4): Sigmoid()
)

- train acc

((net(X) > 0.5) == y).float().mean()

tensor(0.9931)

- test acc

((net(XX) > 0.5) == yy).float().mean()

tensor(0.9967)

- 점점 비본질적인 코드가 늘어남 \(\to\) 코드가 더러워짐 \(\to\) Trainer의 개념 등장