import torch
import numpy as np
import matplotlib.pyplot as plt
1. imports
'figure.figsize'] = (4.5, 3.0) plt.rcParams[
2. 파라메터 학습과정
43052)
torch.manual_seed(= torch.randn(100).sort()
x,_ = torch.randn(100)*0.5
eps = torch.stack([torch.ones(100),x],axis=1)
X = torch.tensor([[2.5],[4.0]])
W = X@W + eps.reshape(100,1)
y = X[:,[1]] x
A. 학습과정 print
= torch.tensor([[-5.0],[10.0]], requires_grad=True)
What = 0.001
alpha print(f"시작값 = {What.data.reshape(-1)}")
for epoc in range(30):
= X @ What
yhat = torch.sum((y-yhat)**2)
loss
loss.backward()= What.data - alpha * What.grad
What.data print(f'loss = {loss:.2f} \n업데이트폭 = {-alpha * What.grad.reshape(-1)} \n업데이트결과: {What.data.reshape(-1)}')
= None What.grad
시작값 = tensor([-5., 10.])
loss = 8587.69
업데이트폭 = tensor([ 1.3423, -1.1889])
업데이트결과: tensor([-3.6577, 8.8111])
loss = 5675.21
업데이트폭 = tensor([ 1.1029, -0.9499])
업데이트결과: tensor([-2.5548, 7.8612])
loss = 3755.64
업데이트폭 = tensor([ 0.9056, -0.7596])
업데이트결과: tensor([-1.6492, 7.1016])
loss = 2489.58
업데이트폭 = tensor([ 0.7431, -0.6081])
업데이트결과: tensor([-0.9061, 6.4935])
loss = 1654.04
업데이트폭 = tensor([ 0.6094, -0.4872])
업데이트결과: tensor([-0.2967, 6.0063])
loss = 1102.32
업데이트폭 = tensor([ 0.4995, -0.3907])
업데이트결과: tensor([0.2028, 5.6156])
loss = 737.84
업데이트폭 = tensor([ 0.4091, -0.3136])
업데이트결과: tensor([0.6119, 5.3020])
loss = 496.97
업데이트폭 = tensor([ 0.3350, -0.2519])
업데이트결과: tensor([0.9469, 5.0501])
loss = 337.71
업데이트폭 = tensor([ 0.2742, -0.2025])
업데이트결과: tensor([1.2211, 4.8477])
loss = 232.40
업데이트폭 = tensor([ 0.2243, -0.1629])
업데이트결과: tensor([1.4454, 4.6848])
loss = 162.73
업데이트폭 = tensor([ 0.1834, -0.1311])
업데이트결과: tensor([1.6288, 4.5537])
loss = 116.63
업데이트폭 = tensor([ 0.1500, -0.1056])
업데이트결과: tensor([1.7787, 4.4480])
loss = 86.13
업데이트폭 = tensor([ 0.1226, -0.0851])
업데이트결과: tensor([1.9013, 4.3629])
loss = 65.93
업데이트폭 = tensor([ 0.1001, -0.0687])
업데이트결과: tensor([2.0014, 4.2942])
loss = 52.57
업데이트폭 = tensor([ 0.0818, -0.0554])
업데이트결과: tensor([2.0832, 4.2388])
loss = 43.72
업데이트폭 = tensor([ 0.0668, -0.0447])
업데이트결과: tensor([2.1500, 4.1941])
loss = 37.86
업데이트폭 = tensor([ 0.0545, -0.0361])
업데이트결과: tensor([2.2045, 4.1579])
loss = 33.97
업데이트폭 = tensor([ 0.0445, -0.0292])
업데이트결과: tensor([2.2490, 4.1287])
loss = 31.40
업데이트폭 = tensor([ 0.0363, -0.0236])
업데이트결과: tensor([2.2853, 4.1051])
loss = 29.70
업데이트폭 = tensor([ 0.0296, -0.0191])
업데이트결과: tensor([2.3150, 4.0860])
loss = 28.57
업데이트폭 = tensor([ 0.0242, -0.0155])
업데이트결과: tensor([2.3392, 4.0705])
loss = 27.83
업데이트폭 = tensor([ 0.0197, -0.0125])
업데이트결과: tensor([2.3589, 4.0580])
loss = 27.33
업데이트폭 = tensor([ 0.0161, -0.0101])
업데이트결과: tensor([2.3750, 4.0479])
loss = 27.00
업데이트폭 = tensor([ 0.0131, -0.0082])
업데이트결과: tensor([2.3881, 4.0396])
loss = 26.79
업데이트폭 = tensor([ 0.0107, -0.0067])
업데이트결과: tensor([2.3988, 4.0330])
loss = 26.64
업데이트폭 = tensor([ 0.0087, -0.0054])
업데이트결과: tensor([2.4075, 4.0276])
loss = 26.55
업데이트폭 = tensor([ 0.0071, -0.0044])
업데이트결과: tensor([2.4146, 4.0232])
loss = 26.48
업데이트폭 = tensor([ 0.0058, -0.0035])
업데이트결과: tensor([2.4204, 4.0197])
loss = 26.44
업데이트폭 = tensor([ 0.0047, -0.0029])
업데이트결과: tensor([2.4251, 4.0168])
loss = 26.41
업데이트폭 = tensor([ 0.0038, -0.0023])
업데이트결과: tensor([2.4290, 4.0144])
B. yhat의 관점에서 시각화
= torch.tensor([[-5.0],[10.0]],requires_grad=True)
What = 0.001
alpha 'o',label = "observed")
plt.plot(x,y,= plt.gcf()
fig = fig.gca()
ax @What.data,'--',color="C1")
ax.plot(x,Xfor epoc in range(30):
= X @ What
yhat = torch.sum((y-yhat)**2)
loss
loss.backward()= What.data - alpha * What.grad
What.data @What.data,'--',color="C1",alpha=0.1)
ax.plot(x,X= None What.grad
C. loss의 관점에서 시각화
def plot_loss():
= plt.figure()
fig = fig.add_subplot(projection='3d')
ax = np.arange(-6, 11, 0.5)
w0 = np.arange(-6, 11, 0.5)
w1 = np.meshgrid(w1,w0)
W1,W0 =W0*0
LOSSfor i in range(len(w0)):
for j in range(len(w1)):
=torch.sum((y-w0[i]-w1[j]*x)**2)
LOSS[i,j]=1, cstride=1, color='b',alpha=0.1)
ax.plot_surface(W0, W1, LOSS, rstride= 30 ## 3d plot의 view 조절
ax.azim = 8 ## 3d plot의 view 조절
ax.dist = 5 ## 3d plot의 view 조절
ax.elev r'$w_0$') # x축 레이블 설정
ax.set_xlabel(r'$w_1$') # y축 레이블 설정
ax.set_ylabel(-5,0,5,10]) # x축 틱 간격 설정
ax.set_xticks([-5,0,5,10]) # y축 틱 간격 설정
ax.set_yticks([# 자동 출력 방지
plt.close(fig) return fig
def l(w0hat,w1hat):
= w0hat + w1hat*x
yhat return torch.sum((y-yhat)**2)
= plot_loss()
fig = fig.gca()
ax 2.5, 4, l(2.5,4), s=200, marker='*', color='red', label=r"${\bf W}=[2.5, 4]'$")
ax.scatter(-5, 10, l(-5,10), s=200, marker='*', color='blue', label=r"initial $\hat{\bf W}=[-5, 10]'$")
ax.scatter(
ax.legend() fig
/tmp/ipykernel_144216/2229765328.py:13: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax.dist = 8 ## 3d plot의 view 조절
D. 애니메이션
from matplotlib import animation
'figure.figsize'] = (7.5,2.5)
plt.rcParams["animation.html"] = "jshtml" plt.rcParams[
def show_animation(alpha=0.001):
## 1. 히스토리 기록을 위한 list 초기화
= []
loss_history = []
yhat_history = []
What_history
## 2. 학습 + 학습과정기록
= torch.tensor([[-5.0],[10.0]],requires_grad=True)
What
What_history.append(What.data.tolist())for epoc in range(30):
=X@What ; yhat_history.append(yhat.data.tolist())
yhat=torch.sum((y-yhat)**2); loss_history.append(loss.item())
loss
loss.backward() = What.data - alpha * What.grad; What_history.append(What.data.tolist())
What.data = None
What.grad
## 3. 시각화
= plt.figure()
fig = fig.add_subplot(1, 2, 1)
ax1 = fig.add_subplot(1, 2, 2, projection='3d')
ax2
#### ax1: yhat의 관점에서..
'o',label=r"$(x_i,y_i)$")
ax1.plot(x,y,= ax1.plot(x,yhat_history[0],label=r"$(x_i,\hat{y}_i)$")
line,
ax1.legend()#### ax2: loss의 관점에서..
= np.arange(-6, 11, 0.5)
w0 = np.arange(-6, 11, 0.5)
w1 = np.meshgrid(w1,w0)
W1,W0 =W0*0
LOSSfor i in range(len(w0)):
for j in range(len(w1)):
=torch.sum((y-w0[i]-w1[j]*x)**2)
LOSS[i,j]=1, cstride=1, color='b',alpha=0.1)
ax2.plot_surface(W0, W1, LOSS, rstride= 30 ## 3d plot의 view 조절
ax2.azim = 8 ## 3d plot의 view 조절
ax2.dist = 5 ## 3d plot의 view 조절
ax2.elev r'$w_0$') # x축 레이블 설정
ax2.set_xlabel(r'$w_1$') # y축 레이블 설정
ax2.set_ylabel(-5,0,5,10]) # x축 틱 간격 설정
ax2.set_xticks([-5,0,5,10]) # y축 틱 간격 설정
ax2.set_yticks([2.5, 4, l(2.5,4), s=200, marker='*', color='red', label=r"${\bf W}=[2.5, 4]'$")
ax2.scatter(-5, 10, l(-5,10), s=200, marker='*', color='blue')
ax2.scatter(
ax2.legend()def animate(epoc):
line.set_ydata(yhat_history[epoc])0],np.array(What_history)[epoc,1],loss_history[epoc],color='grey')
ax2.scatter(np.array(What_history)[epoc,f"alpha = {alpha} / epoch = {epoc}")
fig.suptitle(return line
= animation.FuncAnimation(fig, animate, frames=30)
ani
plt.close()return ani
= show_animation(alpha=0.001)
ani ani
/tmp/ipykernel_144216/464110397.py:36: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax2.dist = 8 ## 3d plot의 view 조절
E. 학습률(\(\alpha\)) 다양하게
-
\(\alpha\)가 너무 작아서 비효율적
=0.0001) show_animation(alpha
/tmp/ipykernel_144216/464110397.py:36: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax2.dist = 8 ## 3d plot의 view 조절
-
\(\alpha\) 가 크다고 무조건 좋은것도 아님
=0.0083) show_animation(alpha
/tmp/ipykernel_144216/464110397.py:36: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax2.dist = 8 ## 3d plot의 view 조절
-
수렴을 안할수도 있음
=0.0085) show_animation(alpha
/tmp/ipykernel_144216/464110397.py:36: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax2.dist = 8 ## 3d plot의 view 조절
-
\(\alpha\) 를 너무 크게함
=0.01) show_animation(alpha
/tmp/ipykernel_144216/464110397.py:36: MatplotlibDeprecationWarning: The dist attribute was deprecated in Matplotlib 3.6 and will be removed two minor releases later.
ax2.dist = 8 ## 3d plot의 view 조절
plt.rcdefaults()'figure.figsize'] = 4.5,3.0 plt.rcParams[
3. SSE \(\to\) MSE
-
학습률 선택하는 것이 중요
-
손실함수를 SSE로 설정하면 학습률 선택에서 비효율적
-
\(\to\) MSE !!!
손실함수가 SSE일 때 코드
= torch.tensor([[-5.0],[10.0]],requires_grad = True)
What for epoc in range(30):
# step1: yhat
= X@What
yhat # step2: loss
= torch.sum((y-yhat)**2)
loss # step3: 미분
loss.backward()# step4: update
= What.data - 0.001 * What.grad
What.data = None What.grad
What.data
tensor([[2.4290],
[4.0144]])
손실함수가 MSE일 때 코드
= torch.tensor([[-5.0],[10.0]],requires_grad = True)
What for epoc in range(30):
# step1: yhat
= X@What
yhat # step2: loss
= torch.sum((y-yhat)**2)/100 # torch.mean((y-yhat)**2)
loss # step3: 미분
loss.backward()# step4: update
= What.data - 0.1 * What.grad
What.data = None What.grad
What.data
tensor([[2.4290],
[4.0144]])
4. 파이토치식 코딩패턴 1
43052)
torch.manual_seed(= torch.randn(100).sort()
x,_ = torch.randn(100)*0.5
eps = torch.stack([torch.ones(100),x],axis=1)
X = torch.tensor([[2.5],[4.0]])
W = X@W + eps.reshape(100,1)
y = X[:,[1]] x
A. 기본 패턴 (★)
= torch.tensor([[-5.0],[10.0]],requires_grad = True)
What for epoc in range(30):
# step1: yhat
= X@What
yhat # step2: loss
= torch.sum((y-yhat)**2)/100
loss # step3: 미분
loss.backward()# step4: update
= What.data - 0.1 * What.grad
What.data = None What.grad
'o')
plt.plot(x,y,@What.data,'--')
plt.plot(x,Xf'What={What.data.reshape(-1)}'); plt.title(
B. Step2 loss값 계산 \(\to\) loss_fn
이용
= torch.tensor([[-5.0],[10.0]],requires_grad = True)
What = torch.nn.MSELoss()
loss_fn for epoc in range(30):
# step1: yhat
= X@What
yhat # step2: loss
#loss = torch.sum((y-yhat)**2)/100
= loss_fn(yhat,y) # 여기서는 큰 상관없지만 습관적으로 yhat을 먼저넣는 연습을 하자!!
loss # step3: 미분
loss.backward()# step4: update
= What.data - 0.1 * What.grad
What.data = None What.grad
'o')
plt.plot(x,y,@What.data,'--')
plt.plot(x,Xf'What={What.data.reshape(-1)}'); plt.title(
C. Step3 yhat 계산 \(\to\) net
이용
-
원래 방식
= torch.tensor([[-5.0],[10.0]],requires_grad = True)
What = X@What
yhat5] yhat[:
tensor([[-29.8211],
[-28.6215],
[-24.9730],
[-21.2394],
[-19.7919]], grad_fn=<SliceBackward0>)
# yhat = net(X)
= torch.nn.Linear(
net =2, # X:(n,2) --> 2
in_features=1, # yhat:(n,1) --> 1
out_features=False
bias )
-
.T(전치를 꼭 해서 넣어줘야함)
= torch.tensor([[-5.0], [10.0]]).T
net.weight.data net.weight
Parameter containing:
tensor([[-5., 10.]], requires_grad=True)
-
아래 값이 모두 같은 것을 알 수 있음
5] net(X)[:
tensor([[-29.8211],
[-28.6215],
[-24.9730],
[-21.2394],
[-19.7919]], grad_fn=<SliceBackward0>)
@What)[:5] (X
tensor([[-29.8211],
[-28.6215],
[-24.9730],
[-21.2394],
[-19.7919]], grad_fn=<SliceBackward0>)
@net.weight.T)[:5] (X
tensor([[-29.8211],
[-28.6215],
[-24.9730],
[-21.2394],
[-19.7919]], grad_fn=<SliceBackward0>)
-
loss_fn
, net
사용 코드
# step1을 위한 사전준비
= torch.nn.Linear(
net =2,
in_features=1,
out_features=False
bias
)= torch.tensor([[-5.0, 10.0]])
net.weight.data # step2를 위한 사전준비
= torch.nn.MSELoss()
loss_fn for epoc in range(30):
# step1: yhat
# yhat = X@What
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
= net.weight.data - 0.1 * net.weight.grad
net.weight.data = None net.weight.grad
'o')
plt.plot(x,y,'--')
plt.plot(x,net(X).data,f'net.weight={net.weight.data.reshape(-1)}'); plt.title(
D. Step4 update \(\to\) optimizer
이용
-
기존의 방식
## -- 준비과정 -- ##
# step1을 위한 사전준비
= torch.nn.Linear(
net =2,
in_features=1,
out_features=False
bias
)= torch.tensor([[-5.0, 10.0]])
net.weight.data # step2를 위한 사전준비
= torch.nn.MSELoss() loss_fn
## -- 1에폭진행 -- ##
# step1:
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
print(net.weight.data)
= net.weight.data - 0.1 * net.weight.grad
net.weight.data print(net.weight.data)
= None net.weight.grad
tensor([[-5., 10.]])
tensor([[-3.6577, 8.8111]])
## -- 2에폭진행 -- ##
# step1: 2에폭진행
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
print(net.weight.data)
= net.weight.data - 0.1 * net.weight.grad
net.weight.data print(net.weight.data)
= None net.weight.grad
tensor([[-3.6577, 8.8111]])
tensor([[-2.5548, 7.8612]])
-
optimizer
이용한 코드
## -- 준비과정 -- ##
# step1을 위한 사전준비
= torch.nn.Linear(
net =2,
in_features=1,
out_features=False
bias
)= torch.tensor([[-5.0, 10.0]])
net.weight.data # step2를 위한 사전준비
= torch.nn.MSELoss()
loss_fn # step4를 위한 사전준비
= torch.optim.SGD(net.parameters(),lr=0.1) #이게 추가됨 optimizr
## -- 1에폭진행 -- ##
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
print(net.weight.data)
#net.weight.data = net.weight.data - 0.1 * net.weight.grad
optimizr.step()print(net.weight.data)
#net.weight.grad = None
optimizr.zero_grad()
tensor([[-5., 10.]])
tensor([[-3.6577, 8.8111]])
## -- 2에폭진행 -- ##
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
print(net.weight.data)
#net.weight.data = net.weight.data - 0.1 * net.weight.grad
optimizr.step()print(net.weight.data)
#net.weight.grad = None
optimizr.zero_grad()
tensor([[-3.6577, 8.8111]])
tensor([[-2.5548, 7.8612]])
-
최종 loss_fn
, net
, optimizer
사용 코드
# step1을 위한 사전준비
= torch.nn.Linear(
net =2,
in_features=1,
out_features=False
bias
)= torch.tensor([[-5.0, 10.0]])
net.weight.data # step2를 위한 사전준비
= torch.nn.MSELoss()
loss_fn # step4를 위한 사전준비
= torch.optim.SGD(net.parameters(),lr=0.1)
optimizr for epoc in range(30):
# step1: yhat
= net(X)
yhat # step2: loss
= loss_fn(yhat,y)
loss # step3: 미분
loss.backward()# step4: update
optimizr.step() optimizr.zero_grad()
'o')
plt.plot(x,y,'--')
plt.plot(x,yhat.data,f'net.weight={net.weight.data.reshape(-1)}'); plt.title(