1. R distributions

1. Normal Distribution(정규분포)

- (Random Number Generation)

- rnorm(n, mean, sd) -> 평균 mean, 표준편차 sd 에서 n개 랜덤 추출

normal_random <- rnorm(n = 10, mean = 0, sd = 1)
print("Normal Random Numbers:")
print(normal_random)

[1] "Normal Random Numbers:"
 [1] -0.72730915 -0.09955575 -2.01791946 -1.17899047  0.08952181  0.84630774
 [7]  0.34269490  0.17711561  1.02601457  0.60893424

- (Probability Density Function - PDF)

확률밀도함수

normal_pdf <- dnorm(x = seq(-10,10,by=0.05), mean = 0, sd = 1)
plot(seq(-10,10,by=0.05),normal_pdf, type='l', main ='Normal PDF')

- (Cumulative Distribution Function - CDF)

누적분포함수

normal_cdf <- pnorm(q = seq(-10,10,by=0.05), mean = 0, sd = 1)
plot(seq(-10,10,by=0.05),normal_cdf, main='Norma CDF', type='l' )

- (Quantile Function)

분위함수

normal_quantile <- qnorm(p = 0.95, mean = 0, sd = 1)  # 95% 분위수
print("Normal Quantile at p=0.95:")
print(normal_quantile)

[1] "Normal Quantile at p=0.95:"
[1] 1.644854

2. Bernoulli Distribution(베르누이 분포)

- rbinom

p=prob 에서
size번 베르누이 시행해서 성공한 횟수
n번 반복해서 return

bernoulli_random <- rbinom(n = 10, size = 1, prob = 0.6)
print("Bernoulli Random Numbers:")
print(bernoulli_random)

[1] "Bernoulli Random Numbers:"
 [1] 1 1 1 0 0 1 1 0 1 1

- (Probability Mass Function - PMF)

확률 질량 함수

bernoulli_pmf <- dbinom(x = c(0,1), size = 1, prob = 0.6)
barplot(bernoulli_pmf, main ='Bernoulli pmf')

- pbinom(q, size, prob)

누적 분포 함수

bernoulli_cdf <- pbinom(q = c(0,1), size = 1, prob = 0.6)
plot(bernoulli_cdf~c(0,1), xlim = c(-0.2, 1.2),main ='Bernoulli cdf', type='s')

bernoulli_cdf <- pbinom(q = c(0,1,2,3), size = 3, prob = 0.6)
plot(bernoulli_cdf~c(0,1,2,3), xlim = c(-0.2, 3.2),main ='Bernoulli cdf', type='s')

- (Quantile Function)

분위함수

bernoulli_quantile <- qbinom(p = 0.8, size = 1, prob = 0.6)
print("Bernoulli Quantile at p=0.8:")
print(bernoulli_quantile)

[1] "Bernoulli Quantile at p=0.8:"
[1] 1

3. Binomial Distribution(이항분포)

- 베르누이분포와 같은 함수지만 size를 1이 아니라 B(n,p)에서의 n으로 바꿈

binomial_random <- rbinom(n = 10, size = 5, prob = 0.4)
print("Binomial Random Numbers:")
print(binomial_random)

[1] "Binomial Random Numbers:"
 [1] 2 3 3 2 3 1 1 3 3 1

- 확률 질량 함수

dbinom(x, size, prob)

bernoulli_pmf <- dbinom(x = c(0:5), size = 5, prob = 0.4)
barplot(bernoulli_pmf, main ='Binomial pmf')
# 0,1,2,3,4,5 가 나올 확률

- 이항분포의 특성상 분산이 더 작음

E(X) = np
Var(X) = npq
q=1-p 이므로 <=1

binomial_random <- rbinom(n = 100, size = 5, prob = 0.4)
mean(binomial_random)
var(binomial_random)

2.21

1.42010101010101

- 누적 분포 함수

pbinom(q, size, prob)

binomial_cdf <- pbinom(q = c(0:5), size = 5, prob = 0.4)
plot(binomial_cdf~c(0:5), main ='Binomial cdf', xlim=c(0,5), type='s')

- (Quantile Function)

분위함수
qbinom(p, size, prob)

binomial_quantile <- qbinom(p = 0.7, size = 5, prob = 0.4)
print("Binomial Quantile at p=0.7:")
print(binomial_quantile)

[1] "Binomial Quantile at p=0.7:"
[1] 3

4. Exponential Distribution(지수분포)

- rexp(n, rate)

exponential_random <- rexp(n = 10, rate = 2)
print("Exponential Random Numbers:")
print(exponential_random)

[1] "Exponential Random Numbers:"
 [1] 0.1571845 0.1074697 0.5828041 0.3155801 1.0756425 0.2600387 0.1384434
 [8] 1.6451927 0.1802277 0.3058570

mean(rexp(n = 10, rate = 2))
mean(rexp(n = 100000, rate = 10))
mean(rexp(n = 100000, rate = 10))
#거의 1에 근사하게 나옴

0.82651321556108

0.0997617675944707

0.100401256810418

- 확률 밀도 함수

dexp(x, rate)

exponential_pdf <- dexp(x = seq(0, 10, length=1000), 
                        rate = 2)
plot(exponential_pdf~c(seq(0, 10, length=1000)), 
      main="Exponential PDF", type='l', xlab='x', ylab='value')

- 누적 분포 함수

pexp(q, rate)

exponential_cdf <- pexp(q = seq(0, 10, length=1000), rate = 2)
plot(exponential_cdf~c(seq(0, 10, length=1000)), 
     main="Exponential CDF", type='s', xlab='x', ylab='value')
###print(exponential_cdf)

- (Quantile Function)

분위함수
qexp(p, rate)

exponential_quantile <- qexp(p = 0.6, rate = 2)
print("Exponential Quantile at p=0.6:")
print(exponential_quantile)

[1] "Exponential Quantile at p=0.6:"
[1] 0.4581454

5. Poisson Distribution(포아송 분포)

- rpois

lambda=3의 포아송 \(\to\) 결과
n=10번 반복

poisson_random <- rpois(n = 10, lambda = 3)
print("Poisson Random Numbers:")
print(poisson_random)

[1] "Poisson Random Numbers:"
 [1] 1 4 2 5 4 3 1 4 2 5

포아송 분포를 따르는 변수의 평균과 분산이 같음

mean(rpois(n=100000,lambda=3))
var(rpois(n=100000,lambda=3))

2.99803

2.97500698366984

- 확률 질량 함수

dpois(x,lambda)

poisson_pmf <- dpois(x = seq(0,12,1), lambda = 3)
names(poisson_pmf) = seq(0,12,1)
barplot(poisson_pmf, main="Poisson PMF")

- 누적 분포 함수

ppois(q, lambda)

poisson_cdf <- ppois(q = seq(0,12,1), lambda = 3)
names(poisson_pmf) = seq(0,12,1)
plot(poisson_cdf~c(0:12), main="Poisson CDF", xlab='x', 
    ylab='value', type='s')

- (Quantile Function)

분위함수
qpois(p, lambda)

poisson_quantile <- qpois(p = 0.9, lambda = 3)
print("Poisson Quantile at p=0.9:")
print(poisson_quantile)

[1] "Poisson Quantile at p=0.9:"
[1] 5

- 추가

- 감마 분포

평균 : \(\alpha\) x \(\beta\)
분산 : \(\alpha\) x \(\beta^2\)

mean(rgamma(100000,1,3))
var(rgamma(100000,1,3))

0.333018431858545

0.111819570741366

- ggplot

library(ggplot2)
library(tidyr)

results <- data.frame(
  Distribution = character(),
  Value = numeric(),
  Type = character(),  # Mean, Median, Variance를 구분하는 열 추가
  stringsAsFactors = FALSE
)

# 1. Normal Distribution
normal_data <- rnorm(n = 1000, mean = 0, sd = 1)

# ggplot histogram 
ggplot(data.frame(Value = normal_data), aes(x = Value)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +  # bins: 막대 개수
  labs(title = "Normal Distribution Histogram", x = "Value", y = "Frequency") +
  theme_bw()

# results 
results <- rbind(results,
                 data.frame(Distribution = "Normal", Value = mean(normal_data), Type = "Mean"),
                 data.frame(Distribution = "Normal", Value = median(normal_data), Type = "Median"),
                 data.frame(Distribution = "Normal", Value = var(normal_data), Type = "Variance"))

# 2. Bernoulli Distribution
bernoulli_data <- rbinom(n = 1000, size = 1, prob = 0.7)

# ggplot bar plot 
ggplot(data.frame(Outcome = factor(bernoulli_data)), aes(x = Outcome)) +
  geom_bar(fill = "coral", color = "black") +
  labs(title = "Bernoulli Distribution Bar Plot", x = "Outcome (0: Failure, 1: Success)", y = "Frequency") +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Bernoulli", Value = mean(bernoulli_data), Type = "Mean"),
                 data.frame(Distribution = "Bernoulli", Value = median(bernoulli_data), Type = "Median"),
                 data.frame(Distribution = "Bernoulli", Value = var(bernoulli_data), Type = "Variance"))

# 3. Binomial Distribution
binomial_data <- rbinom(n = 1000, size = 10, prob = 0.3)

# ggplot histogram 
ggplot(data.frame(Successes = binomial_data), aes(x = Successes)) +
  geom_histogram(binwidth = 1, fill = "lightgreen", color = "black") +  # binwidth: 막대 너비
  labs(title = "Binomial Distribution Histogram", x = "Number of Successes", y = "Frequency") +
  scale_x_continuous(breaks = seq(0, 10, by = 1)) +  # x축 눈금 설정
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Binomial", Value = mean(binomial_data), Type = "Mean"),
                 data.frame(Distribution = "Binomial", Value = median(binomial_data), Type = "Median"),
                 data.frame(Distribution = "Binomial", Value = var(binomial_data), Type = "Variance"))

# 4. Exponential Distribution
exponential_data <- rexp(n = 1000, rate = 2)

# ggplot histogram 
ggplot(data.frame(Time = exponential_data), aes(x = Time)) +
  geom_histogram(bins = 30, fill = "gold", color = "black") +
  labs(title = "Exponential Distribution Histogram", x = "Time", y = "Frequency") +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Exponential", Value = mean(exponential_data), Type = "Mean"),
                 data.frame(Distribution = "Exponential", Value = median(exponential_data), Type = "Median"),
                 data.frame(Distribution = "Exponential", Value = var(exponential_data), Type = "Variance"))

# 5. Poisson Distribution
poisson_data <- rpois(n = 1000, lambda = 5)

# ggplot histogram 
ggplot(data.frame(Events = poisson_data), aes(x = Events)) +
  geom_histogram(binwidth = 1, fill = "violet", color = "black") +
  labs(title = "Poisson Distribution Histogram", x = "Number of Events", y = "Frequency") +
  scale_x_continuous(breaks = seq(0, max(poisson_data), by = 1)) +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Poisson", Value = mean(poisson_data), Type = "Mean"),
                 data.frame(Distribution = "Poisson", Value = median(poisson_data), Type = "Median"),
                 data.frame(Distribution = "Poisson", Value = var(poisson_data), Type = "Variance"))

results

A data.frame: 15 × 3
Distribution	Value	Type
<chr>	<dbl>	<chr>
Normal	0.01671411	Mean
Normal	0.01257944	Median
Normal	0.92430136	Variance
Bernoulli	0.70800000	Mean
Bernoulli	1.00000000	Median
Bernoulli	0.20694294	Variance
Binomial	2.94600000	Mean
Binomial	3.00000000	Median
Binomial	2.10919319	Variance
Exponential	0.51712705	Mean
Exponential	0.34010939	Median
Exponential	0.28038007	Variance
Poisson	4.93600000	Mean
Poisson	5.00000000	Median
Poisson	4.74464865	Variance