from pygam import PoissonGAM, s, te
from pygam.datasets import chicago
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
1. imports
2. data
= chicago(return_X_y=True)
X, y = PoissonGAM(s(0, n_splines=200) + te(3, 1) + s(2)).fit(X, y)
gam print(X)
[[-2.55650000e+03 3.15000000e+01 -7.43354430e+00 -1.95923379e+01]
[-2.55450000e+03 3.30000000e+01 -8.26530612e-01 -2.02173379e+01]
[-2.55350000e+03 2.90000000e+01 5.56645570e+00 -1.96756712e+01]
...
[ 2.55450000e+03 2.10000000e+01 -3.12394366e+00 -6.03620096e+00]
[ 2.55550000e+03 2.70000000e+01 -2.61239437e+01 -5.98700964e-01]
[ 2.55650000e+03 1.60000000e+01 -2.51239437e+01 -4.89036763e+00]]
plt.ion()'figure.figsize'] = (12, 8)
plt.rcParams[= gam.generate_X_grid(term=1, meshgrid=True)
XX = gam.partial_dependence(term=1, X=XX, meshgrid=True)
Z
= plt.axes(projection='3d')
ax 0], XX[1], Z, cmap='viridis') ax.plot_surface(XX[
from pygam import LinearGAM, s
from pygam.datasets import toy_interaction
= toy_interaction(return_X_y=True)
X, y = LinearGAM(s(0, by=1)).fit(X, y)
gam gam.summary()
LinearGAM
=============================================== ==========================================================
Distribution: NormalDist Effective DoF: 20.8507
Link Function: IdentityLink Log Likelihood: -2306456.1139
Number of Samples: 50000 AIC: 4612955.9292
AICc: 4612955.9492
GCV: 0.01
Scale: 0.01
Pseudo R-Squared: 0.9976
==========================================================================================================
Feature Function Lambda Rank EDoF P > x Sig. Code
================================= ==================== ============ ============ ============ ============
s(0) [0.6] 20 19.9 1.11e-16 ***
intercept 1 1.0 4.84e-02 *
==========================================================================================================
Significance codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
WARNING: Fitting splines and a linear function to a feature introduces a model identifiability problem
which can cause p-values to appear significant when they are not.
WARNING: p-values calculated in this manner behave correctly for un-penalized models or models with
known smoothing parameters, but when smoothing parameters have been estimated, the p-values
are typically lower than they should be, meaning that the tests reject the null too readily.
/tmp/ipykernel_164760/1955319438.py:6: UserWarning: KNOWN BUG: p-values computed in this summary are likely much smaller than they should be.
Please do not make inferences based on these values!
Collaborate on a solution, and stay up to date at:
github.com/dswah/pyGAM/issues/163
gam.summary()
from pygam import LinearGAM, s, f
from pygam.datasets import wage
= wage(return_X_y=True)
X, y
## model
= LinearGAM(s(0) + s(1) + f(2))
gam
gam.gridsearch(X, y)
## plotting
;
plt.figure()= plt.subplots(1,3);
fig, axs
= ['year', 'age', 'education']
titles for i, ax in enumerate(axs):
= gam.generate_X_grid(term=i)
XX =i, X=XX))
ax.plot(XX[:, i], gam.partial_dependence(term=i, X=XX, width=.95)[1], c='r', ls='--')
ax.plot(XX[:, i], gam.partial_dependence(termif i == 0:
-30,30)
ax.set_ylim(;
ax.set_title(titles[i])
= gam.predict(X)
prd print(prd)
100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time: 0:00:000:00
[ 53.1205114 99.77681713 111.72416178 ... 70.60725438 92.57124761
104.77865877]
<Figure size 1200x800 with 0 Axes>
import numpy as np
import statsmodels.api as sm
from statsmodels.gam.api import GLMGam, BSplines
from statsmodels.gam.tests.test_penalized import df_autos
= df_autos[['weight', 'hp']]
x_spline = BSplines(x_spline, df=[12, 12], degree=[3, 3])
bs
= np.array([21833888.8, 6460.38479])
alpha = GLMGam.from_formula('city_mpg ~ fuel + drive', data=df_autos,
gam_bs =bs, alpha=alpha)
smoother= gam_bs.fit()
res_bs print(res_bs.summary())
= gam_bs.select_penweight()[0]
aa
= GLMGam.from_formula('city_mpg ~ fuel + drive', data=df_autos,
gam_bs =bs, alpha=aa)
smoother= gam_bs.fit()
res_bs print(res_bs.summary())
Generalized Linear Model Regression Results
==============================================================================
Dep. Variable: city_mpg No. Observations: 203
Model: GLMGam Df Residuals: 188.93
Model Family: Gaussian Df Model: 13.07
Link Function: Identity Scale: 4.8440
Method: PIRLS Log-Likelihood: -440.89
Date: Tue, 20 May 2025 Deviance: 915.18
Time: 00:21:53 Pearson chi2: 915.
No. Iterations: 3 Pseudo R-squ. (CS): 0.9996
Covariance Type: nonrobust
================================================================================
coef std err z P>|z| [0.025 0.975]
--------------------------------------------------------------------------------
Intercept 51.9973 1.989 26.147 0.000 48.100 55.895
fuel[T.gas] -5.8509 0.725 -8.068 0.000 -7.272 -4.430
drive[T.fwd] 1.4220 0.816 1.742 0.081 -0.177 3.021
drive[T.rwd] 1.0827 0.840 1.290 0.197 -0.563 2.728
weight_s0 -3.5615 0.955 -3.728 0.000 -5.434 -1.689
weight_s1 -9.1011 1.743 -5.221 0.000 -12.518 -5.685
weight_s2 -13.0504 1.820 -7.172 0.000 -16.617 -9.484
weight_s3 -14.2879 1.846 -7.739 0.000 -17.907 -10.669
weight_s4 -15.2068 1.885 -8.069 0.000 -18.900 -11.513
weight_s5 -15.9885 1.955 -8.176 0.000 -19.821 -12.156
weight_s6 -16.6497 2.030 -8.203 0.000 -20.628 -12.671
weight_s7 -17.0029 2.037 -8.348 0.000 -20.995 -13.011
weight_s8 -19.3784 2.357 -8.222 0.000 -23.998 -14.759
weight_s9 -20.7959 2.445 -8.505 0.000 -25.588 -16.003
weight_s10 -20.8523 2.434 -8.568 0.000 -25.623 -16.082
hp_s0 -1.4402 0.556 -2.589 0.010 -2.531 -0.350
hp_s1 -3.1671 0.953 -3.323 0.001 -5.035 -1.299
hp_s2 -5.3441 1.185 -4.509 0.000 -7.667 -3.021
hp_s3 -6.4143 1.283 -5.000 0.000 -8.929 -3.900
hp_s4 -7.6873 1.335 -5.757 0.000 -10.305 -5.070
hp_s5 -8.9868 1.363 -6.592 0.000 -11.659 -6.315
hp_s6 -9.3044 1.404 -6.625 0.000 -12.057 -6.552
hp_s7 -10.8922 1.582 -6.885 0.000 -13.993 -7.791
hp_s8 -12.5287 2.319 -5.402 0.000 -17.075 -7.983
hp_s9 -14.5981 3.220 -4.534 0.000 -20.909 -8.287
hp_s10 -11.8905 2.549 -4.666 0.000 -16.886 -6.895
================================================================================
Generalized Linear Model Regression Results
==============================================================================
Dep. Variable: city_mpg No. Observations: 203
Model: GLMGam Df Residuals: 180.48
Model Family: Gaussian Df Model: 21.52
Link Function: Identity Scale: 4.0946
Method: PIRLS Log-Likelihood: -419.19
Date: Tue, 20 May 2025 Deviance: 738.98
Time: 00:21:58 Pearson chi2: 739.
No. Iterations: 3 Pseudo R-squ. (CS): 0.9999
Covariance Type: nonrobust
================================================================================
coef std err z P>|z| [0.025 0.975]
--------------------------------------------------------------------------------
Intercept 52.4614 2.268 23.131 0.000 48.016 56.907
fuel[T.gas] -6.6980 0.748 -8.955 0.000 -8.164 -5.232
drive[T.fwd] 1.3410 0.780 1.719 0.086 -0.188 2.870
drive[T.rwd] 1.0947 0.791 1.384 0.166 -0.456 2.645
weight_s0 -6.6859 2.378 -2.811 0.005 -11.347 -2.024
weight_s1 -17.8591 3.411 -5.235 0.000 -24.545 -11.173
weight_s2 -21.0695 3.038 -6.935 0.000 -27.024 -15.115
weight_s3 -22.5195 3.042 -7.404 0.000 -28.481 -16.558
weight_s4 -23.8743 3.095 -7.714 0.000 -29.940 -17.809
weight_s5 -25.4000 3.190 -7.961 0.000 -31.653 -19.147
weight_s6 -26.2096 3.247 -8.071 0.000 -32.574 -19.845
weight_s7 -25.6648 3.187 -8.053 0.000 -31.911 -19.419
weight_s8 -29.4997 3.669 -8.040 0.000 -36.692 -22.308
weight_s9 -31.0384 3.775 -8.223 0.000 -38.437 -23.640
weight_s10 -29.7902 3.467 -8.593 0.000 -36.585 -22.995
hp_s0 14.0467 5.372 2.615 0.009 3.519 24.575
hp_s1 3.7013 3.077 1.203 0.229 -2.330 9.733
hp_s2 3.5817 3.500 1.023 0.306 -3.279 10.442
hp_s3 0.1493 3.217 0.046 0.963 -6.157 6.455
hp_s4 5.1547 3.404 1.514 0.130 -1.518 11.827
hp_s5 -4.4357 3.208 -1.383 0.167 -10.722 1.851
hp_s6 2.5534 3.292 0.776 0.438 -3.899 9.005
hp_s7 -4.0536 3.350 -1.210 0.226 -10.620 2.513
hp_s8 2.5193 4.267 0.590 0.555 -5.844 10.882
hp_s9 -11.4462 5.134 -2.230 0.026 -21.509 -1.384
hp_s10 -0.6669 3.913 -0.170 0.865 -8.336 7.003
================================================================================