import torch
import matplotlib.pyplot as plt
import pandas as pd
1. imports
'figure.figsize'] = (4.5, 3.0) plt.rcParams[
2. 꺽인 직선을 만드는 방법
-
로지스틱의 한계를 극복하기 위해서는 시그모이드를 취하기 전 꺽인 그래프 모양을 만들어야함
-
아래와 같은 벡터 \(x\)가정
= torch.linspace(-1,1,1001).reshape(-1,1)
x x
tensor([[-1.0000],
[-0.9980],
[-0.9960],
...,
[ 0.9960],
[ 0.9980],
[ 1.0000]])
-
목표: 아래와 같은 벡터 \({\bf y}\)를 만들어보자.
\[{\bf y} = [y_1,y_2,\dots,y_{n}]^\top, \quad y_i = \begin{cases} 9x_i +4.5& x_i <0 \\ -4.5x_i + 4.5& x_i >0 \end{cases}\]
-
방법1 수식 그대로 구현
9*x+4.5,color="blue",alpha=0.1)
plt.plot(x,<0], (9*x+4.5)[x<0],color="blue")
plt.plot(x[x-4.5*x+4.5,color="orange",alpha=0.1)
plt.plot(x,>0], (-4.5*x+4.5)[x>0],color="orange") plt.plot(x[x
= x*0
y <0] = (9*x+4.5)[x<0]
y[x>0] = (-4.5*x+4.5)[x>0]
y[x plt.plot(x,y)
-
방법2 ReLU 이용
= torch.nn.ReLU()
relu #plt.plot(x,-4.5*relu(x),color="red")
#plt.plot(x,-9*relu(-x),color="blue")
= -4.5*relu(x) + -9*relu(-x) + 4.5
y plt.plot(x,y)
-
ReLU 중간과정
= plt.figure(figsize=(6, 4))
fig = fig.add_gridspec(4, 3)
spec = fig.add_subplot(spec[:2,0]); ax1.set_title(r'$x$'); ax1.set_ylim(-1,1)
ax1 = fig.add_subplot(spec[2:,0]); ax2.set_title(r'$-x$'); ax2.set_ylim(-1,1)
ax2 = fig.add_subplot(spec[:2,1]); ax3.set_title(r'$relu(x)$'); ax3.set_ylim(-1,1)
ax3 = fig.add_subplot(spec[2:,1]); ax4.set_title(r'$relu(-x)$'); ax4.set_ylim(-1,1)
ax4 = fig.add_subplot(spec[1:3,2]); ax5.set_title(r'$-4.5 relu(x)-9 relu(-x)+4.5$')
ax5 #---#
'--',color='C0')
ax1.plot(x,-x,'--',color='C1')
ax2.plot('--',color='C0')
ax3.plot(relu(x),-x),'--',color='C1')
ax4.plot(relu(-4.5*relu(x)-9*relu(-x)+4.5,'--',color='C2')
ax5.plot( fig.tight_layout()
-
방법3 ReLU의 브로드캐스팅 활용
-
아래와 같은 아이디어로 y를 계산해도 된다.
- x, relu 준비
- u = [x -x]
- v = relu(u) = [relu(x), relu(-x)] = [v1 v2]
- y = -4.5*v1 + -9*v2 + 4.5
= torch.concat([x,-x],axis=1)
u = relu(u)
v = v[:,[0]]
v1 = v[:,[1]]
v2 = -4.5*v1 -9*v2 + 4.5
y plt.plot(x,y)
-
방법4 y=linr(v)
x = torch.concat([x,-x],axis=1)
u = relu(u)
v = v @ torch.tensor([[-4.5],[-9]]) + 4.5
y plt.plot(x,y)
-
방법5u=linr(x)
x = x @ torch.tensor([[1.0, -1.0]])
u = relu(u)
v = v @ torch.tensor([[-4.5],[-9]]) + 4.5 y
plt.plot(x,y)
-
방법6 torch.nn.Linear()
를 이용
# u = l1(x) # l1은 x->u인 선형변환: (n,1) -> (n,2) 인 선형변환
= torch.nn.Linear(1,2,bias=False)
l1 = torch.tensor([[1.0, -1.0]]).T
l1.weight.data = relu
a1 = torch.nn.Linear(2,1,bias=True)
l2 = torch.tensor([[-4.5],[-9]]).T
l2.weight.data = torch.tensor([4.5])
l2.bias.data #---#
x= l1(x)
u = a1(u)
v = l2(v) y
plt.plot(x,y.data)
= torch.nn.Sequential(l1,a1,l2)
pwlinr plt.plot(x,pwlinr(x).data)
-
수식적 표현
Note
수식표현
(1)
\({\bf X}=\begin{bmatrix} x_1 \\ \dots \\ x_n \end{bmatrix}\)
(2)
\(l_1({\bf X})={\bf X}{\bf W}^{(1)}\overset{bc}{+} {\boldsymbol b}^{(1)}=\begin{bmatrix} x_1 & -x_1 \\ x_2 & -x_2 \\ \dots & \dots \\ x_n & -x_n\end{bmatrix}\)
- \({\bf W}^{(1)}=\begin{bmatrix} 1 & -1 \end{bmatrix}\)
- \({\boldsymbol b}^{(1)}=\begin{bmatrix} 0 & 0 \end{bmatrix}\)
(3)
\((a_1\circ l_1)({\bf X})=\text{relu}\big({\bf X}{\bf W}^{(1)}\overset{bc}{+}{\boldsymbol b}^{(1)}\big)=\begin{bmatrix} \text{relu}(x_1) & \text{relu}(-x_1) \\ \text{relu}(x_2) & \text{relu}(-x_2) \\ \dots & \dots \\ \text{relu}(x_n) & \text{relu}(-x_n)\end{bmatrix}\)
(4)
\((l_2 \circ a_1\circ l_1)({\bf X})=\text{relu}\big({\bf X}{\bf W}^{(1)}\overset{bc}{+}{\boldsymbol b}^{(1)}\big){\bf W}^{(2)}\overset{bc}{+}b^{(2)}\)\(\quad=\begin{bmatrix} -4.5\times\text{relu}(x_1) -9.0 \times \text{relu}(-x_1) +4.5 \\ -4.5\times\text{relu}(x_2) -9.0 \times\text{relu}(-x_2) + 4.5 \\ \dots \\ -4.5\times \text{relu}(x_n) -9.0 \times\text{relu}(-x_n)+4.5 \end{bmatrix}\)
- \({\bf W}^{(2)}=\begin{bmatrix} -4.5 \\ -9 \end{bmatrix}\)
- \(b^{(2)}=4.5\)
(5)
\(\textup{pwlinr}({\bf X})=(l_2 \circ a_1\circ l_1)({\bf X})=\text{relu}\big({\bf X}{\bf W}^{(1)}\overset{bc}{+}{\boldsymbol b}^{(1)}\big){\bf W}^{(2)}\overset{bc}{+}b^{(2)}\)\(\quad =\begin{bmatrix} -4.5\times\text{relu}(x_1) -9.0 \times \text{relu}(-x_1) +4.5 \\ -4.5\times\text{relu}(x_2) -9.0 \times\text{relu}(-x_2) + 4.5 \\ \dots \\ -4.5\times \text{relu}(x_n) -9.0 \times\text{relu}(-x_n)+4.5 \end{bmatrix}\)
3. 스펙의 역설 적합
= pd.read_csv("https://raw.githubusercontent.com/guebin/DL2025/main/posts/ironyofspec.csv") df
= torch.tensor(df.x).float().reshape(-1,1)
x = torch.tensor(df.y).float().reshape(-1,1)
y = torch.tensor(df.prob).float().reshape(-1,1) prob
'.',alpha=0.03)
plt.plot(x,y,'--') plt.plot(x,prob,
-
Step1에 대한 생각: 네트워크를 어떻게 만들까? = 아키텍처를 어떻게 만들까? = 모델링
\[\underset{(n,1)}{\bf X} \overset{l_1}{\to} \underset{(n,2)}{\boldsymbol u^{(1)}} \overset{a_1}{\to} \underset{(n,2)}{\boldsymbol v^{(1)}} \overset{l_1}{\to} \underset{(n,1)}{\boldsymbol u^{(2)}} \overset{a_2}{\to} \underset{(n,1)}{\boldsymbol v^{(2)}}=\underset{(n,1)}{\hat{\boldsymbol y}}\]
- \(l_1\):
torch.nn.Linear(1,2,bias=False)
- \(a_1\):
torch.nn.ReLU()
- \(l_2\):
torch.nn.Linear(2,1,bias=True)
- \(a_2\):
torch.nn.Sigmoid()
-
Step1-4
0].weight.data net[
tensor([[ 0.5153],
[-0.4414]])
2].weight.data net[
tensor([[-0.1371, 0.3319]])
0].weight.data net[
tensor([[ 1.7773],
[-3.0447]])
2].weight.data net[
tensor([[-0.9945, -2.7176]])
1)
torch.manual_seed(= torch.nn.Sequential(
net 1,2,bias=False),
torch.nn.Linear(
torch.nn.ReLU(),2,1,bias=True),
torch.nn.Linear(
torch.nn.Sigmoid()
)= torch.nn.BCELoss()
loss_fn = torch.optim.Adam(net.parameters()) optimizr
for epoc in range(5000):
## step1
= net(x)
yhat ## step2
= loss_fn(yhat,y)
loss ## step3
loss.backward()## step4
optimizr.step() optimizr.zero_grad()
'.',alpha=0.03)
plt.plot(x,y,'--')
plt.plot(x,prob,'--') plt.plot(x,yhat.data,
-
5000번 더 반복
for epoc in range(5000):
## step1
= net(x)
yhat ## step2
= loss_fn(yhat,y)
loss ## step3
loss.backward()## step4
optimizr.step() optimizr.zero_grad()
'.',alpha=0.03)
plt.plot(x,y,'--')
plt.plot(x,prob,'--') plt.plot(x,yhat.data,