General liner model

# https://stackoverflow.com/questions/14069629/plotting-confidence-intervals
# http://cse.naro.affrc.go.jp/takezawa/r-tips/r/63.html
n <- 250
x <- rnorm(n)
df <- data.frame(x = x,y = x + rnorm(n = n))
attach(df)
fit <- lm(y~x)
summary(fit)
shapiro.test(fit$residuals)
shapiro.test(rnorm(n = n))
empirical <- ecdf(fit$residuals)
summary.stepfun(empirical)
# plot scatter
plot(y~x)
abline(fit,col='red',lwd=2)
panel.first = grid(nx = NULL,ny = NULL,lty = 2)
newdata <- seq(min(x),max(y),length.out = 100)
predict_data <- predict(object = fit,newdata = data.frame(x = newdata),interval = 'confidence')
lines(x = newdata,y = predict_data[,3], lty = 'dashed', col = 'red')
lines(x = newdata,y = predict_data[,2], lty = 'dashed', col = 'red')
polygon(x = c(rev(newdata),newdata),y = c(rev(predict_data[,3]),predict_data[,2]),col = rgb(0.5,0.5,0.5,0.3),border = NA)

detach(df)
# cdf
plot(empirical,do.point=F,verticals=T)
x <- seq(-5,5,length.out = 100)
lines(x,pnorm(x,mean=mean(fit$residuals),sd=sqrt(var(fit$residuals))),col='red',lwd=2)
panel.first = grid(nx = NULL,ny = NULL,lty = 2)

# qqplot
qqnorm(fit$residuals)
qqline(fit$residuals,col='red',lwd=2)
panel.first = grid(nx = NULL,ny = NULL,lty = 2)

remove(df)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1868 -0.6080 -0.0388  0.6151  2.5167 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) -0.09076    0.06291  -1.443                0.15    
## x            1.04017    0.06009  17.311 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9943 on 248 degrees of freedom
## Multiple R-squared:  0.5472, Adjusted R-squared:  0.5453 
## F-statistic: 299.7 on 1 and 248 DF,  p-value: < 0.00000000000000022
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  fit$residuals
## W = 0.99463, p-value = 0.5248
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  rnorm(n = n)
## W = 0.9969, p-value = 0.9099
## 
## Step function with continuity 'f'= 0 ,  250 knots with summary
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -3.18682 -0.60802 -0.03878  0.00000  0.61508  2.51675 
## 
## and  251 plateau levels (y) with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.25    0.50    0.50    0.75    1.00

GLM:poisson

# https://www.jstage.jst.go.jp/article/weed/55/4/55_4_287/_pdf
# http://hosho.ees.hokudai.ac.jp/~kubo/stat/2012/kobe/k2/kubostat2012k2.pdf
df <- data.frame(table(rpois(n = n,lambda = 3)))
colnames(df) <- c('x','y')
df
df$x <- as.numeric(df$x)
fit <- glm(y ~ x,family = poisson(link = log),data = df)
summ_result <- summary(fit)
summ_result
cat(paste0('Residual/degrees of freedom=',round(summ_result$deviance/summ_result$df.residual)))
par(family='Meiryo')
xlim <- c(0,max(df$x))
ylim <- c(0,max(df$y))
plot(x=df$x,y=df$y,xlim =xlim,ylim=ylim,cex=1,lwd=2)
par(new = T)
curve(exp(fit$coefficients[1] + fit$coefficients[2] * x),xlim = xlim,ylim = ylim,xaxt='n', yaxt='n', xlab = "", ylab = "",col='red',lwd=2)
panel.first = grid(nx = NULL,ny = NULL,lty = 2)

par(mfrow=c(2,2),family='Meiryo') 
plot(fit)

##     x  y
## 1   0  9
## 2   1 30
## 3   2 62
## 4   3 60
## 5   4 35
## 6   5 29
## 7   6 15
## 8   7  4
## 9   8  4
## 10  9  1
## 11 10  1
## 
## Call:
## glm(formula = y ~ x, family = poisson(link = log), data = df)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -7.247  -2.830  -1.902   2.262   5.247  
## 
## Coefficients:
##             Estimate Std. Error z value            Pr(>|z|)    
## (Intercept)   4.1280     0.1125  36.704 <0.0000000000000002 ***
## x            -0.1992     0.0224  -8.895 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 232.52  on 10  degrees of freedom
## Residual deviance: 143.94  on  9  degrees of freedom
## AIC: 194.84
## 
## Number of Fisher Scoring iterations: 5
## 
## Residual/degrees of freedom=16

GLM:negative binomial distribution

Next time

Source code: https://codepen.io/takaneichinose/pen/oomjqm