Reasoning - lmtest 似然比检验Likelihood Ratio Test 比较两个线性模型

library(lmtest)

Loading required package: zoo


Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

## with data from Greene (1993):
## load data and compute lags
data("USDistLag")
usdl <- na.contiguous(cbind(USDistLag, lag(USDistLag, k = -1)))
colnames(usdl) <- c("con", "gnp", "con1", "gnp1")

fm1 <- lm(con ~ gnp + gnp1, data = usdl)
fm2 <- lm(con ~ gnp + con1 + gnp1, data = usdl)

## various equivalent specifications of the LR test
## 下面4种操作方法是等价的
lrtest(fm2, fm1)

Likelihood ratio test

Model 1: con ~ gnp + con1 + gnp1
Model 2: con ~ gnp + gnp1
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   5 -56.069                         
2   4 -65.871 -1 19.605  9.524e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lrtest(fm2, 2) # Remove variable with idx 2

Likelihood ratio test

Model 1: con ~ gnp + con1 + gnp1
Model 2: con ~ gnp + gnp1
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   5 -56.069                         
2   4 -65.871 -1 19.605  9.524e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lrtest(fm2, "con1") # Remove variable cond 1

Likelihood ratio test

Model 1: con ~ gnp + con1 + gnp1
Model 2: con ~ gnp + gnp1
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   5 -56.069                         
2   4 -65.871 -1 19.605  9.524e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lrtest(fm2, . ~ . - con1)  # Remove con1 by formula

Likelihood ratio test

Model 1: con ~ gnp + con1 + gnp1
Model 2: con ~ gnp + gnp1
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   5 -56.069                         
2   4 -65.871 -1 19.605  9.524e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Survival 模型交互项：

library(survival)

fit2 = coxph(Surv(time, status) ~ age + sex*factor(ph.ecog), data = lung)
lmtest::lrtest(fit2, . ~ . - sex:factor(ph.ecog))

Likelihood ratio test

Model 1: Surv(time, status) ~ age + sex * factor(ph.ecog)
Model 2: Surv(time, status) ~ age + sex + factor(ph.ecog)
  #Df  LogLik Df  Chisq Pr(>Chisq)
1   7 -728.93                     
2   5 -729.05 -2 0.2263      0.893

summary(fit2)

Call:
coxph(formula = Surv(time, status) ~ age + sex * factor(ph.ecog), 
    data = lung)

  n= 227, number of events= 164 
   (1 observation deleted due to missingness)

                          coef exp(coef)  se(coef)      z Pr(>|z|)  
age                   0.009667  1.009714  0.009601  1.007   0.3140  
sex                  -0.641345  0.526583  0.385890 -1.662   0.0965 .
factor(ph.ecog)1      0.335017  1.397964  0.609291  0.550   0.5824  
factor(ph.ecog)2      0.618092  1.855384  0.686960  0.900   0.3683  
factor(ph.ecog)3      1.941570  6.969687  1.033409  1.879   0.0603 .
sex:factor(ph.ecog)1  0.063350  1.065400  0.451918  0.140   0.8885  
sex:factor(ph.ecog)2  0.221865  1.248403  0.507016  0.438   0.6617  
sex:factor(ph.ecog)3        NA        NA  0.000000     NA       NA  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                     exp(coef) exp(-coef) lower .95 upper .95
age                     1.0097     0.9904    0.9909     1.029
sex                     0.5266     1.8990    0.2472     1.122
factor(ph.ecog)1        1.3980     0.7153    0.4235     4.615
factor(ph.ecog)2        1.8554     0.5390    0.4827     7.131
factor(ph.ecog)3        6.9697     0.1435    0.9195    52.827
sex:factor(ph.ecog)1    1.0654     0.9386    0.4394     2.583
sex:factor(ph.ecog)2    1.2484     0.8010    0.4621     3.372
sex:factor(ph.ecog)3        NA         NA        NA        NA

Concordance= 0.636  (se = 0.025 )
Likelihood ratio test= 31.09  on 7 df,   p=6e-05
Wald test            = 30.72  on 7 df,   p=7e-05
Score (logrank) test = 33.92  on 7 df,   p=2e-05

除了用 likelihood ratio test，还可以使用 anova() 去对比。

但如果数据不一致时，上述模型无法见效。这时，应把不同的对比作为变量构建模型，然后可以采用 aov() 进行方差分析，看变量是否显著以评估是否有组间差异。