1. R distributions

Author

이상민

Published

March 25, 2025

1. Normal Distribution(정규분포)

- (Random Number Generation)

- rnorm(n, mean, sd) -> 평균 mean, 표준편차 sd 에서 n개 랜덤 추출

normal_random <- rnorm(n = 10, mean = 0, sd = 1)
print("Normal Random Numbers:")
print(normal_random)
[1] "Normal Random Numbers:"
 [1] -0.72730915 -0.09955575 -2.01791946 -1.17899047  0.08952181  0.84630774
 [7]  0.34269490  0.17711561  1.02601457  0.60893424

- (Probability Density Function - PDF)

  • 확률밀도함수
normal_pdf <- dnorm(x = seq(-10,10,by=0.05), mean = 0, sd = 1)
plot(seq(-10,10,by=0.05),normal_pdf, type='l', main ='Normal PDF')

- (Cumulative Distribution Function - CDF)

  • 누적분포함수
normal_cdf <- pnorm(q = seq(-10,10,by=0.05), mean = 0, sd = 1)
plot(seq(-10,10,by=0.05),normal_cdf, main='Norma CDF', type='l' )

- (Quantile Function)

  • 분위함수
normal_quantile <- qnorm(p = 0.95, mean = 0, sd = 1)  # 95% 분위수
print("Normal Quantile at p=0.95:")
print(normal_quantile)
[1] "Normal Quantile at p=0.95:"
[1] 1.644854

2. Bernoulli Distribution(베르누이 분포)

- rbinom

  • p=prob 에서
  • size번 베르누이 시행해서 성공한 횟수
  • n번 반복해서 return
bernoulli_random <- rbinom(n = 10, size = 1, prob = 0.6)
print("Bernoulli Random Numbers:")
print(bernoulli_random)
[1] "Bernoulli Random Numbers:"
 [1] 1 1 1 0 0 1 1 0 1 1

- (Probability Mass Function - PMF)

  • 확률 질량 함수
bernoulli_pmf <- dbinom(x = c(0,1), size = 1, prob = 0.6)
barplot(bernoulli_pmf, main ='Bernoulli pmf')

- pbinom(q, size, prob)

  • 누적 분포 함수
bernoulli_cdf <- pbinom(q = c(0,1), size = 1, prob = 0.6)
plot(bernoulli_cdf~c(0,1), xlim = c(-0.2, 1.2),main ='Bernoulli cdf', type='s')

bernoulli_cdf <- pbinom(q = c(0,1,2,3), size = 3, prob = 0.6)
plot(bernoulli_cdf~c(0,1,2,3), xlim = c(-0.2, 3.2),main ='Bernoulli cdf', type='s')

- (Quantile Function)

  • 분위함수
bernoulli_quantile <- qbinom(p = 0.8, size = 1, prob = 0.6)
print("Bernoulli Quantile at p=0.8:")
print(bernoulli_quantile)
[1] "Bernoulli Quantile at p=0.8:"
[1] 1

3. Binomial Distribution(이항분포)

- 베르누이분포와 같은 함수지만 size를 1이 아니라 B(n,p)에서의 n으로 바꿈

binomial_random <- rbinom(n = 10, size = 5, prob = 0.4)
print("Binomial Random Numbers:")
print(binomial_random)
[1] "Binomial Random Numbers:"
 [1] 2 3 3 2 3 1 1 3 3 1

- 확률 질량 함수

  • dbinom(x, size, prob)
bernoulli_pmf <- dbinom(x = c(0:5), size = 5, prob = 0.4)
barplot(bernoulli_pmf, main ='Binomial pmf')
# 0,1,2,3,4,5 가 나올 확률

- 이항분포의 특성상 분산이 더 작음

  • E(X) = np
  • Var(X) = npq
  • q=1-p 이므로 <=1
binomial_random <- rbinom(n = 100, size = 5, prob = 0.4)
mean(binomial_random)
var(binomial_random)
2.21
1.42010101010101

- 누적 분포 함수

  • pbinom(q, size, prob)
binomial_cdf <- pbinom(q = c(0:5), size = 5, prob = 0.4)
plot(binomial_cdf~c(0:5), main ='Binomial cdf', xlim=c(0,5), type='s')

- (Quantile Function)

  • 분위함수
  • qbinom(p, size, prob)
binomial_quantile <- qbinom(p = 0.7, size = 5, prob = 0.4)
print("Binomial Quantile at p=0.7:")
print(binomial_quantile)
[1] "Binomial Quantile at p=0.7:"
[1] 3

4. Exponential Distribution(지수분포)

- rexp(n, rate)

exponential_random <- rexp(n = 10, rate = 2)
print("Exponential Random Numbers:")
print(exponential_random)
[1] "Exponential Random Numbers:"
 [1] 0.1571845 0.1074697 0.5828041 0.3155801 1.0756425 0.2600387 0.1384434
 [8] 1.6451927 0.1802277 0.3058570
mean(rexp(n = 10, rate = 2))
mean(rexp(n = 100000, rate = 10))
mean(rexp(n = 100000, rate = 10))
#거의 1에 근사하게 나옴
0.82651321556108
0.0997617675944707
0.100401256810418

- 확률 밀도 함수

  • dexp(x, rate)
exponential_pdf <- dexp(x = seq(0, 10, length=1000), 
                        rate = 2)
plot(exponential_pdf~c(seq(0, 10, length=1000)), 
      main="Exponential PDF", type='l', xlab='x', ylab='value')

- 누적 분포 함수

  • pexp(q, rate)
exponential_cdf <- pexp(q = seq(0, 10, length=1000), rate = 2)
plot(exponential_cdf~c(seq(0, 10, length=1000)), 
     main="Exponential CDF", type='s', xlab='x', ylab='value')
###print(exponential_cdf)

- (Quantile Function)

  • 분위함수
  • qexp(p, rate)
exponential_quantile <- qexp(p = 0.6, rate = 2)
print("Exponential Quantile at p=0.6:")
print(exponential_quantile)
[1] "Exponential Quantile at p=0.6:"
[1] 0.4581454

5. Poisson Distribution(포아송 분포)

- rpois

  • lambda=3의 포아송 \(\to\) 결과
  • n=10번 반복
poisson_random <- rpois(n = 10, lambda = 3)
print("Poisson Random Numbers:")
print(poisson_random)
[1] "Poisson Random Numbers:"
 [1] 1 4 2 5 4 3 1 4 2 5

포아송 분포를 따르는 변수의 평균과 분산이 같음

mean(rpois(n=100000,lambda=3))
var(rpois(n=100000,lambda=3))
2.99803
2.97500698366984

- 확률 질량 함수

  • dpois(x,lambda)
poisson_pmf <- dpois(x = seq(0,12,1), lambda = 3)
names(poisson_pmf) = seq(0,12,1)
barplot(poisson_pmf, main="Poisson PMF")

- 누적 분포 함수

  • ppois(q, lambda)
poisson_cdf <- ppois(q = seq(0,12,1), lambda = 3)
names(poisson_pmf) = seq(0,12,1)
plot(poisson_cdf~c(0:12), main="Poisson CDF", xlab='x', 
    ylab='value', type='s')

- (Quantile Function)

  • 분위함수
  • qpois(p, lambda)
poisson_quantile <- qpois(p = 0.9, lambda = 3)
print("Poisson Quantile at p=0.9:")
print(poisson_quantile)
[1] "Poisson Quantile at p=0.9:"
[1] 5

- 추가

- 감마 분포

  • 평균 : \(\alpha\) x \(\beta\)
  • 분산 : \(\alpha\) x \(\beta^2\)
mean(rgamma(100000,1,3))
var(rgamma(100000,1,3))
0.333018431858545
0.111819570741366

- ggplot

library(ggplot2)
library(tidyr) 
results <- data.frame(
  Distribution = character(),
  Value = numeric(),
  Type = character(),  # Mean, Median, Variance를 구분하는 열 추가
  stringsAsFactors = FALSE
)
# 1. Normal Distribution
normal_data <- rnorm(n = 1000, mean = 0, sd = 1)
# ggplot histogram 
ggplot(data.frame(Value = normal_data), aes(x = Value)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +  # bins: 막대 개수
  labs(title = "Normal Distribution Histogram", x = "Value", y = "Frequency") +
  theme_bw() 

# results 
results <- rbind(results,
                 data.frame(Distribution = "Normal", Value = mean(normal_data), Type = "Mean"),
                 data.frame(Distribution = "Normal", Value = median(normal_data), Type = "Median"),
                 data.frame(Distribution = "Normal", Value = var(normal_data), Type = "Variance"))
# 2. Bernoulli Distribution
bernoulli_data <- rbinom(n = 1000, size = 1, prob = 0.7)
# ggplot bar plot 
ggplot(data.frame(Outcome = factor(bernoulli_data)), aes(x = Outcome)) +
  geom_bar(fill = "coral", color = "black") +
  labs(title = "Bernoulli Distribution Bar Plot", x = "Outcome (0: Failure, 1: Success)", y = "Frequency") +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Bernoulli", Value = mean(bernoulli_data), Type = "Mean"),
                 data.frame(Distribution = "Bernoulli", Value = median(bernoulli_data), Type = "Median"),
                 data.frame(Distribution = "Bernoulli", Value = var(bernoulli_data), Type = "Variance"))
# 3. Binomial Distribution
binomial_data <- rbinom(n = 1000, size = 10, prob = 0.3)
# ggplot histogram 
ggplot(data.frame(Successes = binomial_data), aes(x = Successes)) +
  geom_histogram(binwidth = 1, fill = "lightgreen", color = "black") +  # binwidth: 막대 너비
  labs(title = "Binomial Distribution Histogram", x = "Number of Successes", y = "Frequency") +
  scale_x_continuous(breaks = seq(0, 10, by = 1)) +  # x축 눈금 설정
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Binomial", Value = mean(binomial_data), Type = "Mean"),
                 data.frame(Distribution = "Binomial", Value = median(binomial_data), Type = "Median"),
                 data.frame(Distribution = "Binomial", Value = var(binomial_data), Type = "Variance"))
# 4. Exponential Distribution
exponential_data <- rexp(n = 1000, rate = 2)
# ggplot histogram 
ggplot(data.frame(Time = exponential_data), aes(x = Time)) +
  geom_histogram(bins = 30, fill = "gold", color = "black") +
  labs(title = "Exponential Distribution Histogram", x = "Time", y = "Frequency") +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Exponential", Value = mean(exponential_data), Type = "Mean"),
                 data.frame(Distribution = "Exponential", Value = median(exponential_data), Type = "Median"),
                 data.frame(Distribution = "Exponential", Value = var(exponential_data), Type = "Variance"))
# 5. Poisson Distribution
poisson_data <- rpois(n = 1000, lambda = 5)
# ggplot histogram 
ggplot(data.frame(Events = poisson_data), aes(x = Events)) +
  geom_histogram(binwidth = 1, fill = "violet", color = "black") +
  labs(title = "Poisson Distribution Histogram", x = "Number of Events", y = "Frequency") +
  scale_x_continuous(breaks = seq(0, max(poisson_data), by = 1)) +
  theme_bw()

results <- rbind(results,
                 data.frame(Distribution = "Poisson", Value = mean(poisson_data), Type = "Mean"),
                 data.frame(Distribution = "Poisson", Value = median(poisson_data), Type = "Median"),
                 data.frame(Distribution = "Poisson", Value = var(poisson_data), Type = "Variance"))
results
A data.frame: 15 × 3
Distribution Value Type
<chr> <dbl> <chr>
Normal 0.01671411 Mean
Normal 0.01257944 Median
Normal 0.92430136 Variance
Bernoulli 0.70800000 Mean
Bernoulli 1.00000000 Median
Bernoulli 0.20694294 Variance
Binomial 2.94600000 Mean
Binomial 3.00000000 Median
Binomial 2.10919319 Variance
Exponential 0.51712705 Mean
Exponential 0.34010939 Median
Exponential 0.28038007 Variance
Poisson 4.93600000 Mean
Poisson 5.00000000 Median
Poisson 4.74464865 Variance