Lecture 9 - Goodness-of-fit

Example 1 - PM10 particles in Oslo

Author

Anna Lindgren

Published

May 4, 2026

1 Packages

New packages

We will need two new packages:

  • caret for calculating confusion matrices and associated measures,
  • pROC for producing ROC-curves and AUC.

Install them first (Tools > Install Packages...).

# Lecture 9 - Oslo particles - Goodness-of-fit
library(tidyverse); theme_set(theme_bw() + theme(text = element_text(size = 16)))
library(readxl)
library(caret)
library(pROC)

2 Refit all models

Read the data and turn categorical x-variables into factor variables. We will also need a factor version, highpm10_cat, of the y-variable, highpm10. Refit all models from Lecture 8:

oslo <- read_excel("Data/oslo.xlsx")
oslo <- oslo |> mutate(
  highpm10_cat = factor(highpm10,
                        levels = c(0, 1), 
                        labels = c("Low", "High")),
  tempdiff = relevel(as.factor(tempdiff), "zero"),
  zerodiff = relevel(as.factor(zerodiff), "zero"))

model_oslo_glm <- glm(highpm10 ~ I(cars/1000)*windspeed + tempdiff, family = "binomial", data = oslo)
model_red_glm <- glm(highpm10 ~ tempdiff, family = "binomial", data = oslo)
model_0_glm <- glm(highpm10 ~ 1, family = "binomial", data = oslo)
model_1_glm <- glm(highpm10 ~ I(cars/1000), family = "binomial", data = oslo)
model_2_glm <- glm(highpm10 ~ I(cars/1000) + windspeed, family = "binomial", data = oslo)
model_3_glm <- glm(highpm10 ~ I(cars/1000) + zerodiff, family = "binomial", data = oslo)
model_4_glm <- glm(highpm10 ~ I(cars/1000)*windspeed, family = "binomial", data = oslo)
model_5_glm <- glm(highpm10 ~ I(cars/1000) + tempdiff, family = "binomial", data = oslo)
model_6_glm <- glm(highpm10 ~ I(cars/1000)*windspeed + zerodiff, family = "binomial", data = oslo)

2.1 Calculate the expected probabilities using all the models

pred_phat <- oslo |> mutate(
  p_0 = predict(model_0_glm, type = "response"),
  p_1 = predict(model_1_glm, type = "response"), 
  p_2 = predict(model_2_glm, type = "response"), 
  p_3 = predict(model_3_glm, type = "response"), 
  p_red = predict(model_red_glm, type = "response"), 
  p_4 = predict(model_4_glm, type = "response"), 
  p_5 = predict(model_5_glm, type = "response"), 
  p_6 = predict(model_6_glm, type = "response"), 
  p_oslo = predict(model_oslo_glm, type = "response"))
glimpse(pred_phat)
Rows: 500
Columns: 17
$ cars         <dbl> 2308, 3084, 110, 1854, 2351, 2662, 2478, 2387, 984, 2159,…
$ windspeed    <dbl> 4.2, 4.8, 4.3, 3.0, 5.6, 2.3, 1.9, 8.9, 2.0, 5.2, 3.4, 4.…
$ temp2m       <dbl> -4.4, -5.7, -13.5, 1.4, 4.1, 5.8, 2.7, 7.1, 4.1, 1.1, -9.…
$ tempdiff     <fct> zero, zero, zero, zero, pos, zero, zero, zero, zero, zero…
$ zerodiff     <fct> zero, zero, zero, zero, nonzero, zero, zero, zero, zero, …
$ windquad     <chr> "NE", "NE", "NE", "SE", "NW", "SW", "SW", "SW", "SW", "NE…
$ highpm10     <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
$ highpm10_cat <fct> Low, Low, Low, Low, High, Low, Low, Low, Low, Low, Low, L…
$ p_0          <dbl> 0.228, 0.228, 0.228, 0.228, 0.228, 0.228, 0.228, 0.228, 0…
$ p_1          <dbl> 0.2685731, 0.3469301, 0.1142603, 0.2282989, 0.2726122, 0.…
$ p_2          <dbl> 0.23462056, 0.29339186, 0.08407689, 0.23209207, 0.1933119…
$ p_3          <dbl> 0.19754609, 0.27459274, 0.06783712, 0.16064648, 0.5326425…
$ p_red        <dbl> 0.1683938, 0.1683938, 0.1683938, 0.1683938, 0.3544304, 0.…
$ p_4          <dbl> 0.21873189, 0.33564065, 0.04371516, 0.21221119, 0.1837597…
$ p_5          <dbl> 0.19699171, 0.26999879, 0.07121687, 0.16173017, 0.4954221…
$ p_6          <dbl> 0.17562057, 0.27664805, 0.03521523, 0.15832748, 0.4055599…
$ p_oslo       <dbl> 0.17011543, 0.26485253, 0.03479163, 0.15953615, 0.3197706…

3 Confusion matrix

Construct the confusion matrix for the Oslo model.

First calculate the predicted categories, \(\hat{Y}_i\), using \(\hat{Y}_i = 1\) if \(p_i > 0.5\) and 0 otherwise and turn it into a factor variable with the same labels as highpm10_cat:

pred_phat <- pred_phat |> mutate(
  yhat_oslo = factor(p_oslo > 0.5,
                     levels = c(FALSE, TRUE),
                     labels = c("Low", "High")))

Calculate the confusion matrix for the oslo model, using confusionMatrix() from the caret package. You have to use the factor version of \(Y\) and define which category is the “Positive” outcome:

cm_oslo <- confusionMatrix(
  data = pred_phat$yhat_oslo, 
  reference = pred_phat$highpm10_cat,
  positive = "High")
cm_oslo
Confusion Matrix and Statistics

          Reference
Prediction Low High
      Low  374   90
      High  12   24
                                         
               Accuracy : 0.796          
                 95% CI : (0.758, 0.8305)
    No Information Rate : 0.772          
    P-Value [Acc > NIR] : 0.1091         
                                         
                  Kappa : 0.2364         
                                         
 Mcnemar's Test P-Value : 2.457e-14      
                                         
            Sensitivity : 0.2105         
            Specificity : 0.9689         
         Pos Pred Value : 0.6667         
         Neg Pred Value : 0.8060         
             Prevalence : 0.2280         
         Detection Rate : 0.0480         
   Detection Prevalence : 0.0720         
      Balanced Accuracy : 0.5897         
                                         
       'Positive' Class : High           
                                         
glimpse(cm_oslo)
List of 6
 $ positive: chr "High"
 $ table   : 'table' int [1:2, 1:2] 374 12 90 24
  ..- attr(*, "dimnames")=List of 2
  .. ..$ Prediction: chr [1:2] "Low" "High"
  .. ..$ Reference : chr [1:2] "Low" "High"
 $ overall : Named num [1:7] 0.796 0.236 0.758 0.83 0.772 ...
  ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
 $ byClass : Named num [1:11] 0.211 0.969 0.667 0.806 0.667 ...
  ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
 $ mode    : chr "sens_spec"
 $ dots    : list()
 - attr(*, "class")= chr "confusionMatrix"
cm_oslo$table
          Reference
Prediction Low High
      Low  374   90
      High  12   24
cm_oslo$overall
      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  7.960000e-01   2.364355e-01   7.579738e-01   8.304830e-01   7.720000e-01 
AccuracyPValue  McnemarPValue 
  1.091351e-01   2.456741e-14 
cm_oslo$byClass
         Sensitivity          Specificity       Pos Pred Value 
           0.2105263            0.9689119            0.6666667 
      Neg Pred Value            Precision               Recall 
           0.8060345            0.6666667            0.2105263 
                  F1           Prevalence       Detection Rate 
           0.3200000            0.2280000            0.0480000 
Detection Prevalence    Balanced Accuracy 
           0.0720000            0.5897191 

4 ROC-curves

The function roc() from the pROC package calculates the ROC-curve.

4.1 The null model

roc_0 <- roc(highpm10 ~ p_0, data = pred_phat)
roc_0

Call:
roc.formula(formula = highpm10 ~ p_0, data = pred_phat)

Data: p_0 in 386 controls (highpm10 0) < 114 cases (highpm10 1).
Area under the curve: 0.5

There are a number of things saved in the output:

glimpse(roc_0)
List of 17
 $ percent           : logi FALSE
 $ sensitivities     : num [1:2] 1 0
 $ specificities     : num [1:2] 0 1
 $ thresholds        : num [1:2] -Inf Inf
 $ direction         : chr "<"
 $ cases             : Named num [1:114] 0.228 0.228 0.228 0.228 0.228 ...
  ..- attr(*, "names")= chr [1:114] "5" "14" "15" "20" ...
 $ controls          : Named num [1:386] 0.228 0.228 0.228 0.228 0.228 ...
  ..- attr(*, "names")= chr [1:386] "1" "2" "3" "4" ...
 $ fun.sesp          :function (...)  
 $ auc               : 'auc' num 0.5
  ..- attr(*, "partial.auc")= logi FALSE
  ..- attr(*, "percent")= logi FALSE
  ..- attr(*, "roc")=List of 15
  .. ..$ percent           : logi FALSE
  .. ..$ sensitivities     : num [1:2] 1 0
  .. ..$ specificities     : num [1:2] 0 1
  .. ..$ thresholds        : num [1:2] -Inf Inf
  .. ..$ direction         : chr "<"
  .. ..$ cases             : Named num [1:114] 0.228 0.228 0.228 0.228 0.228 ...
  .. .. ..- attr(*, "names")= chr [1:114] "5" "14" "15" "20" ...
  .. ..$ controls          : Named num [1:386] 0.228 0.228 0.228 0.228 0.228 ...
  .. .. ..- attr(*, "names")= chr [1:386] "1" "2" "3" "4" ...
  .. ..$ fun.sesp          :function (...)  
  .. ..$ auc               : 'auc' num 0.5
  .. .. ..- attr(*, "partial.auc")= logi FALSE
  .. .. ..- attr(*, "percent")= logi FALSE
  .. .. ..- attr(*, "roc")=List of 8
  .. .. .. ..- attr(*, "class")= chr "roc"
  .. ..$ call              : language roc.default(response = response, predictor = predictors[, 1])
  .. ..$ original.predictor: Named num [1:500] 0.228 0.228 0.228 0.228 0.228 ...
  .. .. ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
  .. ..$ original.response : Named num [1:500] 0 0 0 0 1 0 0 0 0 0 ...
  .. .. ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
  .. ..$ predictor         : Named num [1:500] 0.228 0.228 0.228 0.228 0.228 ...
  .. .. ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
  .. ..$ response          : Named num [1:500] 0 0 0 0 1 0 0 0 0 0 ...
  .. .. ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
  .. ..$ levels            : chr [1:2] "0" "1"
  .. ..- attr(*, "class")= chr "roc"
 $ call              : language roc.formula(formula = highpm10 ~ p_0, data = pred_phat)
 $ original.predictor: Named num [1:500] 0.228 0.228 0.228 0.228 0.228 ...
  ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
 $ original.response : Named num [1:500] 0 0 0 0 1 0 0 0 0 0 ...
  ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
 $ predictor         : Named num [1:500] 0.228 0.228 0.228 0.228 0.228 ...
  ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
 $ response          : Named num [1:500] 0 0 0 0 1 0 0 0 0 0 ...
  ..- attr(*, "names")= chr [1:500] "1" "2" "3" "4" ...
 $ levels            : chr [1:2] "0" "1"
 $ predictor.name    : chr "p_0"
 $ response.name     : chr "highpm10"
 - attr(*, "class")= chr "roc"

4.2 The oslo model:

roc_oslo <- roc(highpm10 ~ p_oslo, data = pred_phat)
roc_oslo

Call:
roc.formula(formula = highpm10 ~ p_oslo, data = pred_phat)

Data: p_oslo in 386 controls (highpm10 0) < 114 cases (highpm10 1).
Area under the curve: 0.7547

4.3 Plot ROC-curve

You can extract the coordinates of the curve by the coords() function.

coords(roc_oslo) |> head()
   threshold specificity sensitivity
1       -Inf 0.000000000           1
2 0.01279326 0.002590674           1
3 0.01650654 0.005181347           1
4 0.01714387 0.007772021           1
5 0.01771694 0.010362694           1
6 0.01904887 0.012953368           1

You can use these coordinates for ploting, but the pROC package contains a function ggroc() that can do this for you. Since it uses ggplot() you can also add ggplot() elements to the plot. For instance, since both axes are 0-1, we specify a square plot area with + coord_fixed() and add a suitable title:

ggroc(roc_oslo) +
  coord_fixed() +
  labs(title = "ROC-curve for model oslo")

Note that the x-axis is reversed!

The ggroc() function can plot several ROC-curves if given a list and the names to use as labels. Let’s add the ROC-curve for the null model, i.e., the diagonal:

ggroc(list(null = roc_0, oslo = roc_oslo)) +
  coord_fixed() +
  labs(title = "ROC-curves for model oslo and the null model")

4.4 Find best threshold value

The default Youden method maximizing the distance to the diagonal:

youden <- coords(roc_oslo, "best")
youden
  threshold specificity sensitivity
1 0.2448035    0.738342   0.6403509

Using the “closest.topleft” method minimizing the distance to the top-left ideal corner:

topleft <- coords(roc_oslo, "best", best.method = "closest.topleft")
topleft
  threshold specificity sensitivity
1 0.2330427   0.6917098   0.6842105

The topleft method results in more equal sensitivity and specificity so let’s use that. Add variable name = "methodname" so that we automatically get colours and labels:

youden$name <- "youden"
topleft$name <- "topleft"

ggroc(list(null = roc_0, oslo = roc_oslo), linewidth = 1) +
  geom_point(data = topleft, aes(x = specificity, y = sensitivity), size = 3) +
  geom_point(data = youden, aes(x = specificity, y = sensitivity), size = 3) +
  coord_fixed() +
  labs(title = "ROC-curve for model oslo",
       subtitle = "with optimal thresholds")

4.5 ROC-curves for all the models

roc_1 <- roc(highpm10 ~ p_1, data = pred_phat)
roc_2 <- roc(highpm10 ~ p_2, data = pred_phat)
roc_3 <- roc(highpm10 ~ p_3, data = pred_phat)
roc_4 <- roc(highpm10 ~ p_4, data = pred_phat)
roc_5 <- roc(highpm10 ~ p_5, data = pred_phat)
roc_6 <- roc(highpm10 ~ p_6, data = pred_phat)
roc_red <- roc(highpm10 ~ p_red, data = pred_phat)

ggroc(list(`0` = roc_0, `1` = roc_1, `2` = roc_2, `3` = roc_3, 
           `4` = roc_4, `5` = roc_5, `6` = roc_6, red = roc_red,
           oslo = roc_oslo),
      linewidth = 1) +
  coord_fixed() +
  labs(title = "ROC-curves for all the models") 

5 Area Under the Curve

5.1 AUC for model oslo

You can pick out the AUC from the ROC-curve using the function auc():

auc(roc_oslo)
Area under the curve: 0.7547

and calculate its confidence interval using DeLong’s method with the function ci():

ci_oslo <- ci(roc_oslo)
ci_oslo
95% CI: 0.705-0.8045 (DeLong)

Note that the result consists of three numerical values even if only two of them are printed.

# lower limit:
ci_oslo[1]
[1] 0.70503
# AUC:
ci_oslo[2]
[1] 0.7547496
# upper limit:
ci_oslo[3]
[1] 0.8044691

5.2 AUC for all the models

aucs <- 
  data.frame(
    model = c("0", "1", "2", "red", "3", "4", "5", "6", "oslo"),
    auc = c(auc(roc_0), auc(roc_1), auc(roc_2), auc(roc_red),
            auc(roc_3), auc(roc_4), auc(roc_5), auc(roc_6),
            auc(roc_oslo)),
    lwr = c(ci(roc_0)[1], ci(roc_1)[1],
            ci(roc_2)[1], ci(roc_red)[1],
            ci(roc_3)[1], ci(roc_4)[1],
            ci(roc_5)[1], ci(roc_6)[1],
            ci(roc_oslo)[1]),
    upr = c(ci(auc(roc_0))[3], ci(auc(roc_1))[3],
            ci(auc(roc_2))[3], ci(auc(roc_red))[3],
            ci(auc(roc_3))[3], ci(auc(roc_4))[3],
            ci(auc(roc_5))[3], ci(auc(roc_6))[3],
            ci(auc(roc_oslo))[3]))
aucs
  model       auc       lwr       upr
1     0 0.5000000 0.5000000 0.5000000
2     1 0.6482138 0.5934717 0.7029559
3     2 0.6861876 0.6350702 0.7373050
4   red 0.6384306 0.5878164 0.6890447
5     3 0.7279793 0.6752745 0.7806840
6     4 0.6946187 0.6432921 0.7459453
7     5 0.7297064 0.6769386 0.7824741
8     6 0.7486365 0.6988793 0.7983936
9  oslo 0.7547496 0.7050300 0.8044691

5.3 Compare AUC against model oslo

The function roc.test() can be used to test whether the AUC values for two models are significantly different:

roc.test(roc_0, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_0 and roc_oslo
Z = -10.042, p-value < 2.2e-16
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.3044691 -0.2050300
sample estimates:
AUC of roc1 AUC of roc2 
  0.5000000   0.7547496 
roc.test(roc_1, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_1 and roc_oslo
Z = -3.873, p-value = 0.0001075
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.16044852 -0.05262302
sample estimates:
AUC of roc1 AUC of roc2 
  0.6482138   0.7547496 
roc.test(roc_2, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_2 and roc_oslo
Z = -3.1416, p-value = 0.00168
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.11133589 -0.02578801
sample estimates:
AUC of roc1 AUC of roc2 
  0.6861876   0.7547496 
roc.test(roc_red, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_red and roc_oslo
Z = -5.3592, p-value = 8.361e-08
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.1588593 -0.0737786
sample estimates:
AUC of roc1 AUC of roc2 
  0.6384306   0.7547496 
roc.test(roc_3, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_3 and roc_oslo
Z = -2.129, p-value = 0.03326
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.051415262 -0.002125325
sample estimates:
AUC of roc1 AUC of roc2 
  0.7279793   0.7547496 
roc.test(roc_4, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_4 and roc_oslo
Z = -3.0023, p-value = 0.00268
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.09938587 -0.02087593
sample estimates:
AUC of roc1 AUC of roc2 
  0.6946187   0.7547496 
roc.test(roc_5, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_5 and roc_oslo
Z = -1.9942, p-value = 0.04613
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.0496562234 -0.0004301324
sample estimates:
AUC of roc1 AUC of roc2 
  0.7297064   0.7547496 
roc.test(roc_6, roc_oslo)

    DeLong's test for two correlated ROC curves

data:  roc_6 and roc_oslo
Z = -1.5386, p-value = 0.1239
alternative hypothesis: true difference in AUC is not equal to 0
95 percent confidence interval:
 -0.013900550  0.001674389
sample estimates:
AUC of roc1 AUC of roc2 
  0.7486365   0.7547496