R.version.string
package
sapply(package, packageVersion)
[1] "R version 3.6.1 (2019-07-05)"
[1] "lmtest"
$lmtest
[1] 0 9 37
list(function (formula, order.by = NULL, alternative = c("greater",
"two.sided", "less"), iterations = 15, exact = NULL, tol = 0.0000000001,
data = list())
{
dname <- paste(deparse(substitute(formula)))
alternative <- match.arg(alternative)
if (!inherits(formula, "formula")) {
if (!is.null(w <- weights(formula))) {
if (!isTRUE(all.equal(as.vector(w), rep(1L, length(w)))))
stop("weighted regressions are not supported")
}
X <- if (is.matrix(formula$x))
formula$x
else model.matrix(terms(formula), model.frame(formula))
y <- if (is.vector(formula$y))
formula$y
else model.response(model.frame(formula))
}
else {
mf <- model.frame(formula, data = data)
y <- model.response(mf)
X <- model.matrix(formula, data = data)
}
if (!is.null(order.by)) {
if (inherits(order.by, "formula")) {
z <- model.matrix(order.by, data = data)
z <- as.vector(z[, ncol(z)])
}
else {
z <- order.by
}
X <- as.matrix(X[order(z), ])
y <- y[order(z)]
}
n <- nrow(X)
if (is.null(exact))
exact <- (n < 100)
k <- ncol(X)
res <- lm.fit(X, y)$residuals
dw <- sum(diff(res)^2)/sum(res^2)
Q1 <- chol2inv(qr.R(qr(X)))
if (n < 3) {
warning("not enough observations for computing a p value, set to 1")
pval <- 1
}
else {
if (exact) {
A <- diag(c(1, rep(2, n - 2), 1))
A[abs(row(A) - col(A)) == 1] <- -1
MA <- diag(rep(1, n)) - X %*% Q1 %*% t(X)
MA <- MA %*% A
ev <- eigen(MA)$values[1:(n - k)]
if (any(Im(ev) > tol))
warning("imaginary parts of eigenvalues discarded")
ev <- Re(ev)
ev <- ev[ev > tol]
pdw <- function(dw) .Fortran("pan", as.double(c(dw,
ev)), as.integer(length(ev)), as.double(0), as.integer(iterations),
x = double(1), PACKAGE = "lmtest")$x
pval <- switch(alternative, two.sided = (2 * min(pdw(dw),
1 - pdw(dw))), less = (1 - pdw(dw)), greater = pdw(dw))
if (is.na(pval) || ((pval > 1) | (pval < 0))) {
warning("exact p value cannot be computed (not in [0,1]), approximate p value will be used")
exact <- FALSE
}
}
if (!exact) {
if (n < max(5, k)) {
warning("not enough observations for computing an approximate p value, set to 1")
pval <- 1
}
else {
AX <- matrix(as.vector(filter(X, c(-1, 2, -1))),
ncol = k)
AX[1, ] <- X[1, ] - X[2, ]
AX[n, ] <- X[n, ] - X[(n - 1), ]
XAXQ <- t(X) %*% AX %*% Q1
P <- 2 * (n - 1) - sum(diag(XAXQ))
Q <- 2 * (3 * n - 4) - 2 * sum(diag(crossprod(AX) %*%
Q1)) + sum(diag(XAXQ %*% XAXQ))
dmean <- P/(n - k)
dvar <- 2/((n - k) * (n - k + 2)) * (Q - P *
dmean)
pval <- switch(alternative, two.sided = (2 *
pnorm(abs(dw - dmean), sd = sqrt(dvar), lower.tail = FALSE)),
less = pnorm(dw, mean = dmean, sd = sqrt(dvar),
lower.tail = FALSE), greater = pnorm(dw,
mean = dmean, sd = sqrt(dvar)))
}
}
}
alternative <- switch(alternative, two.sided = "true autocorrelation is not 0",
less = "true autocorrelation is less than 0", greater = "true autocorrelation is greater than 0")
names(dw) <- "DW"
RVAL <- list(statistic = dw, method = "Durbin-Watson test",
alternative = alternative, p.value = pval, data.name = dname)
class(RVAL) <- "htest"
return(RVAL)
})
dwtest | R Documentation |
Performs the Durbin-Watson test for autocorrelation of disturbances.
dwtest(formula, order.by = NULL, alternative = c("greater", "two.sided", "less"), iterations = 15, exact = NULL, tol = 1e-10, data = list())
formula
|
a symbolic description for the model to be tested (or a fitted |
order.by
|
Either a vector |
alternative
|
a character string specifying the alternative hypothesis. |
iterations
|
an integer specifying the number of iterations when calculating the p-value with the “pan” algorithm. |
exact
|
logical. If set to |
tol
|
tolerance. Eigenvalues computed have to be greater than |
data
|
an optional data frame containing the variables in the model. By default the variables are taken from the environment which |
The Durbin-Watson test has the null hypothesis that the autocorrelation of the disturbances is 0. It is possible to test against the alternative that it is greater than, not equal to, or less than 0, respectively. This can be specified by the alternative
argument.
Under the assumption of normally distributed disturbances, the null distribution of the Durbin-Watson statistic is the distribution of a linear combination of chi-squared variables. The p-value is computed using the Fortran version of Applied Statistics Algorithm AS 153 by Farebrother (1980, 1984). This algorithm is called “pan” or “gradsol”. For large sample sizes the algorithm might fail to compute the p value; in that case a warning is printed and an approximate p value will be given; this p value is computed using a normal approximation with mean and variance of the Durbin-Watson test statistic.
Examples can not only be found on this page, but also on the help pages of the data sets bondyield
, currencysubstitution
, growthofmoney
, moneydemand
, unemployment
, wages
.
An object of class “htest”
containing:
statistic
|
the test statistic. |
method
|
a character string with the method used. |
alternative
|
a character string describing the alternative hypothesis. |
p.value
|
the corresponding p-value. |
data.name
|
a character string with the data name. |
J. Durbin & G.S. Watson (1950), Testing for Serial Correlation in Least Squares Regression I. Biometrika 37, 409–428.
J. Durbin & G.S. Watson (1951), Testing for Serial Correlation in Least Squares Regression II. Biometrika 38, 159–177.
J. Durbin & G.S. Watson (1971), Testing for Serial Correlation in Least Squares Regression III. Biometrika 58, 1–19.
R.W. Farebrother (1980), Pan’s Procedure for the Tail Probabilities of the Durbin-Watson Statistic (Corr: 81V30 p189; AS R52: 84V33 p363- 366; AS R53: 84V33 p366- 369). Applied Statistics 29, 224–227.
R. W. Farebrother (1984), [AS R53] A Remark on Algorithms AS 106 (77V26 p92-98), AS 153 (80V29 p224-227) and AS 155: The Distribution of a Linear Combination of χ^2 Random Variables (80V29 p323-333) Applied Statistics 33, 366–369.
W. Krテ、mer & H. Sonnberger (1986), The Linear Regression Model under Test. Heidelberg: Physica.
lm
## generate two AR(1) error terms with parameter ## rho = 0 (white noise) and rho = 0.9 respectively err1 <- rnorm(100) ## generate regressor and dependent variable x <- rep(c(-1,1), 50) y1 <- 1 + x + err1 ## perform Durbin-Watson test dwtest(y1 ~ x) err2 <- filter(err1, 0.9, method="recursive") y2 <- 1 + x + err2 dwtest(y2 ~ x)
list(function (x, y, offset = NULL, method = "qr", tol = 0.0000001,
singular.ok = TRUE, ...)
{
if (is.null(n <- nrow(x)))
stop("'x' must be a matrix")
if (n == 0L)
stop("0 (non-NA) cases")
p <- ncol(x)
if (p == 0L) {
return(list(coefficients = numeric(), residuals = y,
fitted.values = 0 * y, rank = 0, df.residual = length(y)))
}
ny <- NCOL(y)
if (is.matrix(y) && ny == 1)
y <- drop(y)
if (!is.null(offset))
y <- y - offset
if (NROW(y) != n)
stop("incompatible dimensions")
if (method != "qr")
warning(gettextf("method = '%s' is not supported. Using 'qr'",
method), domain = NA)
chkDots(...)
z <- .Call(C_Cdqrls, x, y, tol, FALSE)
if (!singular.ok && z$rank < p)
stop("singular fit encountered")
coef <- z$coefficients
pivot <- z$pivot
r1 <- seq_len(z$rank)
dn <- colnames(x)
if (is.null(dn))
dn <- paste0("x", 1L:p)
nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank))
r2 <- if (z$rank < p)
(z$rank + 1L):p
else integer()
if (is.matrix(y)) {
coef[r2, ] <- NA
if (z$pivoted)
coef[pivot, ] <- coef
dimnames(coef) <- list(dn, colnames(y))
dimnames(z$effects) <- list(nmeffects, colnames(y))
}
else {
coef[r2] <- NA
if (z$pivoted)
coef[pivot] <- coef
names(coef) <- dn
names(z$effects) <- nmeffects
}
z$coefficients <- coef
r1 <- y - z$residuals
if (!is.null(offset))
r1 <- r1 + offset
if (z$pivoted)
colnames(z$qr) <- colnames(x)[z$pivot]
qr <- z[c("qr", "qraux", "pivot", "tol", "rank")]
c(z[c("coefficients", "residuals", "effects", "rank")], list(fitted.values = r1,
assign = attr(x, "assign"), qr = structure(qr, class = "qr"),
df.residual = n - z$rank))
})
lm.fit | R Documentation |
These are the basic computing engines called by lm
used to fit linear models. These should usually not be used directly unless by experienced users. .lm.fit()
is bare bone wrapper to the innermost QR-based C code, on which glm.fit
and lsfit
are based as well, for even more experienced users.
lm.fit (x, y, offset = NULL, method = "qr", tol = 1e-7, singular.ok = TRUE, ...) lm.wfit(x, y, w, offset = NULL, method = "qr", tol = 1e-7, singular.ok = TRUE, ...) .lm.fit(x, y, tol = 1e-7)
x
|
design matrix of dimension |
y
|
vector of observations of length |
w
|
vector of weights (length |
offset
|
(numeric of length |
method
|
currently, only |
tol
|
tolerance for the |
singular.ok
|
logical. If |
…
|
currently disregarded. |
a list
with components (for lm.fit
and lm.wfit
)
coefficients
|
|
residuals
|
|
fitted.values
|
|
effects
|
|
weights
|
|
rank
|
integer, giving the rank |
df.residual
|
degrees of freedom of residuals |
qr
|
the QR decomposition, see |
Fits without any columns or non-zero weights do not have the effects
and qr
components.
.lm.fit()
returns a subset of the above, the qr
part unwrapped, plus a logical component pivoted
indicating if the underlying QR algorithm did pivot.
lm
which you should use for linear least squares regression, unless you know better.
require(utils) set.seed(129) n <- 7 ; p <- 2 X <- matrix(rnorm(n * p), n, p) # no intercept! y <- rnorm(n) w <- rnorm(n)^2 str(lmw <- lm.wfit(x = X, y = y, w = w)) str(lm. <- lm.fit (x = X, y = y)) if(require("microbenchmark")) { mb <- microbenchmark(lm(y~X), lm.fit(X,y), .lm.fit(X,y)) print(mb) boxplot(mb, notch=TRUE) }