1 Loading Libraries

# Loading necessary packages
## Markdown Update
if(! "rmarkdown" %in% installed.packages()) { install.packages("rmarkdown", dependencies = TRUE) }
library(rmarkdown)

# Loading other packages if not available
if(! "readxl" %in% installed.packages()) { install.packages("readxl", dependencies = TRUE) }
library(readxl)

if(! "tidyr" %in% installed.packages()) { install.packages("tidyr", dependencies = TRUE) }
#library(tidyr)

if(! "zoo" %in% installed.packages()) { install.packages("zoo", dependencies = TRUE) }
#library(zoo)

if(! "car" %in% installed.packages()) { install.packages("car", dependencies = TRUE) }
#library(car)

# Global Settings
options(digits =   4)
options(scipen = 999)
setwd("~/AC UNI-ORG/AB SIM/GDBA/R")

2 Regression for Disease Data

2.1 Getting Data for Disease

# Download Training Data from URL
# Download Test Data from URL

2.2 Exploring Data for Disease

# Loading other packages if not available
if(! "vtable" %in% installed.packages()) { install.packages("vtable", dependencies = TRUE) }
library(vtable)

# Add ID to Data Frame
Patients$ID <- 1:nrow(Patients)

# Show Characteristics of Data Frame
cat("\n\nColumns Available in Data Frame:\n")
## 
## 
## Columns Available in Data Frame:
names(Patients)
## [1] "PatientID" "Age"       "Disease"   "ID"
cat("\n\nShow Structure of the Data Frame:\n")
## 
## 
## Show Structure of the Data Frame:
str(Patients)
## 'data.frame':    20 obs. of  4 variables:
##  $ PatientID: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age      : int  25 29 30 31 32 41 41 42 44 49 ...
##  $ Disease  : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ ID       : int  1 2 3 4 5 6 7 8 9 10 ...
cat("\n\nFirst 5 Rows of Data Frame:\n")
## 
## 
## First 5 Rows of Data Frame:
head(Patients, 5)
##   PatientID Age Disease ID
## 1         1  25       0  1
## 2         2  29       0  2
## 3         3  30       0  3
## 4         4  31       0  4
## 5         5  32       0  5
cat("\n\nDescriptive Statistics of Columns in Data Frame:\n")
## 
## 
## Descriptive Statistics of Columns in Data Frame:
st(Patients, add.median = TRUE, out = "csv", simple.kable = TRUE, col.align = "right", align = "right", digits = 5,
   title='Summary Statistics',
   summ = list(
     c('notNA(x)','mean(x)','sd(x)','min(x)', 'pctile(x)[25]', 'median(x)', 'pctile(x)[75]', 'max(x)', 'propNA(x)', 'getmode(x)'),
     c('notNA(x)','mean(x)')
   ),
   summ.names = list(
     c('N','Mean','SD','Min','P25','P50','P75', 'Max','NA','Mode'),
     c('Count','Percent')
   )
)
##    Variable  N  Mean      SD Min   P25  P50   P75 Max NA Mode
## 1 PatientID 20  10.5  5.9161   1  5.75 10.5 15.25  20  0     
## 2       Age 20 52.95  19.508  25 38.75 49.5    69  84  0     
## 3   Disease 20  0.35 0.48936   0     0    0     1   1  0     
## 4        ID 20  10.5  5.9161   1  5.75 10.5 15.25  20  0

2.3 Performing Linear Regression for Disease

# Load Library if not available
if(! "stats" %in% installed.packages()) { install.packages("stats", dependencies = TRUE) }
library(stats)

# Run regression
LinReg1    <- lm(Disease ~ Age,  data = Patients)

summary(LinReg1)
## 
## Call:
## lm(formula = Disease ~ Age, data = Patients)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6992 -0.2345 -0.0602  0.2943  0.7655 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.33357    0.28521   -1.17     0.26  
## Age          0.01291    0.00507    2.55     0.02 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.431 on 18 degrees of freedom
## Multiple R-squared:  0.265,  Adjusted R-squared:  0.224 
## F-statistic: 6.49 on 1 and 18 DF,  p-value: 0.0202
anova(LinReg1)
## Analysis of Variance Table
## 
## Response: Disease
##           Df Sum Sq Mean Sq F value Pr(>F)  
## Age        1   1.21   1.205    6.49   0.02 *
## Residuals 18   3.34   0.186                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate Predictions
Patients$PredictLm <- predict(LinReg1)

# Calculate RMSE
RMSELinReg1 <- sqrt(mean((Patients$Disease - Patients$PredictLm)^2))

cat("\n\nRMSE of Linear Regression: ", sprintf(RMSELinReg1, fmt = '%#.4f'), "\n")
## 
## 
## RMSE of Linear Regression:  0.4090

2.4 Performing Logistic Regression for Disease

# Run Regression
LogReg1 <- glm(Disease ~ Age, data = Patients, family = binomial(link = 'logit'))

summary(LogReg1)
## 
## Call:
## glm(formula = Disease ~ Age, family = binomial(link = "logit"), 
##     data = Patients)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -4.3721     1.9656   -2.22    0.026 *
## Age           0.0670     0.0322    2.08    0.038 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25.898  on 19  degrees of freedom
## Residual deviance: 20.201  on 18  degrees of freedom
## AIC: 24.2
## 
## Number of Fisher Scoring iterations: 4
anova(LogReg1)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Disease
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                    19       25.9           
## Age   1      5.7        18       20.2    0.017 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Load Library if not available
if(! "pscl" %in% installed.packages()) { install.packages("pscl", dependencies = TRUE) }
library(pscl)

# Calculate McFadden’s R2
pR2(LogReg1)
## fitting null model for pseudo-r2
##      llh  llhNull       G2 McFadden     r2ML     r2CU 
## -10.1007 -12.9489   5.6964   0.2200   0.2479   0.3414
# Calculate Predictions
Patients$PredictLogit <- predict(LogReg1)

# Calculate RMSE
RMSELogReg1 <- sqrt(mean((Patients$Disease - Patients$PredictLogit)^2))

cat("\n\nRMSE of Logistic Regression: ", sprintf(RMSELogReg1, fmt = '%#.4f'), "\n")
## 
## 
## RMSE of Logistic Regression:  1.6150

2.5 Comparing Models for Disease

# Produce AIC table
AIClin <- AIC(LinReg1)
AIClog <- AIC(LogReg1)
cat("\n\nAIC of Linear Regression: ", sprintf(AIClin, fmt = '%#.4f'), "\n")
## 
## 
## AIC of Linear Regression:  26.9915
cat("AIC of Logistic Regression: ", sprintf(AIClog, fmt = '%#.4f'), "\n")
## AIC of Logistic Regression:  24.2015
# Load Library if not available
if(! "dplyr" %in% installed.packages()) { install.packages("dplyr", dependencies = TRUE) }
library(dplyr)
if(! "broom" %in% installed.packages()) { install.packages("broom", dependencies = TRUE) }
library(broom)


glance(LinReg1) %>%
  dplyr::select(adj.r.squared, AIC, BIC, p.value)
## # A tibble: 1 × 4
##   adj.r.squared   AIC   BIC p.value
##           <dbl> <dbl> <dbl>   <dbl>
## 1         0.224  27.0  30.0  0.0202
glance(LogReg1) %>%
  dplyr::select(AIC, BIC)
## # A tibble: 1 × 2
##     AIC   BIC
##   <dbl> <dbl>
## 1  24.2  26.2

2.6 Showing Scatter Plot for Disease

# Show Scatter Plot
plot(Patients$Age, Patients$Disease,
xlab = "Age", ylab = "Disease", cex.lab = 1.2,
main = "Disease vs. Age", cex.main = 1.6, 
xlim = c(20, 90), pch = 16)
abline(LinReg1, lty = 3, col = "red")

# Add Courve to Plot
curve(predict(LogReg1, data.frame(Age = x),
              type = "resp"), add = TRUE, lwd = 2, col = "blue")
legend("topleft", legend = c("Lin", "Log"), col = c("red", "blue"), lty = c(3, 1), cex = 1.2)

2.7 Showing Plot with CI for Disease

# Load Library if not available
if(! "sjPlot" %in% installed.packages()) { install.packages("sjPlot", dependencies = TRUE) }
library(sjPlot)

# Plot Probability with CI
plot_model(LogReg1, type = "pred", axis.lim = c(0, 1))

2.8 Showing McFadden’s Rsquare for Logistic Regression of Disease

# Load Library if not available
if(! "pscl" %in% installed.packages()) { install.packages("pscl", dependencies = TRUE) }
library(pscl)

pR2(LogReg1)
## fitting null model for pseudo-r2
##      llh  llhNull       G2 McFadden     r2ML     r2CU 
## -10.1007 -12.9489   5.6964   0.2200   0.2479   0.3414

2.9 Calculating Odds Ratio and 95% CI and Predict for Disease

# Calculate Odds Ratio and 95% CI
exp(cbind(OR = coef(LogReg1), confint(LogReg1)))
##                  OR     2.5 % 97.5 %
## (Intercept) 0.01262 0.0001111 0.3458
## Age         1.06925 1.0110685 1.1534
# Predict Chance to Have Disease For Test Data
PatientsTest$Prob <- predict(LogReg1, newdata = PatientsTest, type = "response")

cat("\n\nThe Probability of Catching Disease for New Data is Shown in Prob:\n")
## 
## 
## The Probability of Catching Disease for New Data is Shown in Prob:
PatientsTest
##    PatientID Age Disease    Prob
## 1          1  26       0 0.06715
## 2          2  31       0 0.09142
## 3          3  31       1 0.09142
## 4          4  32       0 0.09713
## 5          5  35       0 0.11623
## 6          6  41       0 0.16426
## 7          7  41       1 0.16426
## 8          8  42       0 0.17365
## 9          9  82       1 0.75367
## 10        10  83       1 0.76589
Disease50 <- exp(LogReg1$coefficients[1] + LogReg1$coefficients[2] * 50) / (1 + exp(LogReg1$coefficients[1] + LogReg1$coefficients[2] * 50))
cat("\n\nThe Probability of a 50-Year-old Patient Catching the Disease is:", Disease50 * 100, "%.\n")
## 
## 
## The Probability of a 50-Year-old Patient Catching the Disease is: 26.42 %.

2.10 Calculating Deviance

# Run Regression for Null Model - Only Intercept
LogReg1Null <- glm(Disease ~ 1, data = Patients, family = binomial(link = 'logit'))

summary(LogReg1Null)
## 
## Call:
## glm(formula = Disease ~ 1, family = binomial(link = "logit"), 
##     data = Patients)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   -0.619      0.469   -1.32     0.19
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25.898  on 19  degrees of freedom
## Residual deviance: 25.898  on 19  degrees of freedom
## AIC: 27.9
## 
## Number of Fisher Scoring iterations: 4
anova(LogReg1Null)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Disease
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL                    19       25.9
# Compute Rsq 
r2log <- 1 - LogReg1$deviance / LogReg1$null.deviance

cat("\n\nThe R2 of this Logistic Regression Model is:", r2log * 100, "%.\n")
## 
## 
## The R2 of this Logistic Regression Model is: 22 %.

3 Logistic Regression with TelcoChurn

3.1 Getting Data for TelcoChurn (VMchurn)

# Download Training Data from URL

3.2 Exploring Data for TelcoChurn

# Loading other packages if not available
if(! "vtable" %in% installed.packages()) { install.packages("vtable", dependencies = TRUE) }
library(vtable)

# Add ID to Data Frame
VMchurn$ID <- 1:nrow(VMchurn)

# Show Characteristics of Data Frame
cat("\n\nColumns Available in Data Frame:\n")
## 
## 
## Columns Available in Data Frame:
names(VMchurn)
##  [1] "CustomerID"           "Gender"               "SeniorCitizen"       
##  [4] "MaritalStatus"        "Dependents"           "Tenure"              
##  [7] "PhoneService"         "MultipleLines"        "InternetService"     
## [10] "OnlineSecurity"       "OnlineBackup"         "DeviceProtection"    
## [13] "TechSupport"          "StreamingTV"          "StreamingMovies"     
## [16] "Contract"             "PaperlessBilling"     "PaymentMethod"       
## [19] "InternationalPlan"    "VoiceMailPlan"        "NumbervMailMessages" 
## [22] "TotalDayMinutes"      "TotalDayCalls"        "TotalEveMinutes"     
## [25] "TotalEveCalls"        "TotalNightMinutes"    "TotalNightCalls"     
## [28] "TotalIntlMinutes"     "TotalIntlCalls"       "CustomerServiceCalls"
## [31] "TotalCall"            "TotalRevenue"         "Churn"               
## [34] "ID"
cat("\n\nShow Structure of the Data Frame:\n")
## 
## 
## Show Structure of the Data Frame:
str(VMchurn)
## 'data.frame':    3333 obs. of  34 variables:
##  $ CustomerID          : Factor w/ 3333 levels "0002-ORFBO","0004-TLHLJ",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Gender              : Factor w/ 2 levels "Female","Male": 1 2 1 1 1 1 1 2 1 1 ...
##  $ SeniorCitizen       : int  0 0 0 1 1 0 1 1 0 0 ...
##  $ MaritalStatus       : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 2 1 1 1 2 ...
##  $ Dependents          : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 2 ...
##  $ Tenure              : int  9 4 9 71 7 5 1 45 3 4 ...
##  $ PhoneService        : Factor w/ 1 level "Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ MultipleLines       : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
##  $ InternetService     : Factor w/ 3 levels "DSL","Fiber optic",..: 1 2 1 2 1 2 2 1 3 3 ...
##  $ OnlineSecurity      : Factor w/ 3 levels "No","No internet service",..: 1 1 1 3 3 1 1 3 2 2 ...
##  $ OnlineBackup        : Factor w/ 3 levels "No","No internet service",..: 3 1 1 3 1 1 1 1 2 2 ...
##  $ DeviceProtection    : Factor w/ 3 levels "No","No internet service",..: 1 3 1 3 1 1 1 3 2 2 ...
##  $ TechSupport         : Factor w/ 3 levels "No","No internet service",..: 3 1 3 3 1 1 1 1 2 2 ...
##  $ StreamingTV         : Factor w/ 3 levels "No","No internet service",..: 3 1 3 3 1 1 1 1 2 2 ...
##  $ StreamingMovies     : Factor w/ 3 levels "No","No internet service",..: 1 1 3 3 1 1 1 3 2 2 ...
##  $ Contract            : Factor w/ 3 levels "Month-to-month",..: 2 1 1 3 1 1 1 2 1 1 ...
##  $ PaperlessBilling    : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 1 1 1 ...
##  $ PaymentMethod       : Factor w/ 4 levels "Bank transfer (automatic)",..: 4 3 2 1 3 3 3 2 4 4 ...
##  $ InternationalPlan   : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 2 1 1 1 ...
##  $ VoiceMailPlan       : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ NumbervMailMessages : int  0 0 36 0 0 0 0 0 0 0 ...
##  $ TotalDayMinutes     : num  168.8 122.2 178.7 190.2 67.7 ...
##  $ TotalDayCalls       : int  137 112 134 68 68 95 55 133 158 99 ...
##  $ TotalEveMinutes     : num  241 132 179 262 196 ...
##  $ TotalEveCalls       : int  107 94 102 64 86 128 124 86 120 93 ...
##  $ TotalNightMinutes   : num  205 170 127 130 236 ...
##  $ TotalNightCalls     : int  106 106 82 92 137 105 81 80 46 106 ...
##  $ TotalIntlMinutes    : num  15.5 10.3 8 8.8 12 12.9 10 11.5 12.4 8 ...
##  $ TotalIntlCalls      : int  4 9 4 4 2 5 7 3 3 4 ...
##  $ CustomerServiceCalls: int  0 5 2 0 1 3 3 0 1 1 ...
##  $ TotalCall           : int  354 326 324 228 294 336 270 302 328 303 ...
##  $ TotalRevenue        : num  593 281 572 7904 340 ...
##  $ Churn               : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 2 1 2 ...
##  $ ID                  : int  1 2 3 4 5 6 7 8 9 10 ...
cat("\n\nFirst 5 Rows of Data Frame:\n")
## 
## 
## First 5 Rows of Data Frame:
head(VMchurn, 5)
##   CustomerID Gender SeniorCitizen MaritalStatus Dependents Tenure PhoneService
## 1 0002-ORFBO Female             0           Yes        Yes      9          Yes
## 2 0004-TLHLJ   Male             0            No         No      4          Yes
## 3 0013-MHZWF Female             0            No        Yes      9          Yes
## 4 0013-SMEOE Female             1           Yes         No     71          Yes
## 5 0015-UOCOJ Female             1            No         No      7          Yes
##   MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection
## 1            No             DSL             No          Yes               No
## 2            No     Fiber optic             No           No              Yes
## 3            No             DSL             No           No               No
## 4            No     Fiber optic            Yes          Yes              Yes
## 5            No             DSL            Yes           No               No
##   TechSupport StreamingTV StreamingMovies       Contract PaperlessBilling
## 1         Yes         Yes              No       One year              Yes
## 2          No          No              No Month-to-month              Yes
## 3         Yes         Yes             Yes Month-to-month              Yes
## 4         Yes         Yes             Yes       Two year              Yes
## 5          No          No              No Month-to-month              Yes
##               PaymentMethod InternationalPlan VoiceMailPlan NumbervMailMessages
## 1              Mailed check                No            No                   0
## 2          Electronic check               Yes            No                   0
## 3   Credit card (automatic)                No           Yes                  36
## 4 Bank transfer (automatic)                No            No                   0
## 5          Electronic check                No            No                   0
##   TotalDayMinutes TotalDayCalls TotalEveMinutes TotalEveCalls TotalNightMinutes
## 1           168.8           137           241.4           107             204.8
## 2           122.2           112           131.7            94             169.5
## 3           178.7           134           178.6           102             126.8
## 4           190.2            68           262.2            64             130.0
## 5            67.7            68           195.7            86             236.5
##   TotalNightCalls TotalIntlMinutes TotalIntlCalls CustomerServiceCalls
## 1             106             15.5              4                    0
## 2             106             10.3              9                    5
## 3              82              8.0              4                    2
## 4              92              8.8              4                    0
## 5             137             12.0              2                    1
##   TotalCall TotalRevenue Churn ID
## 1       354        593.3    No  1
## 2       326        280.9   Yes  2
## 3       324        571.5    No  3
## 4       228       7904.3    No  4
## 5       294        340.4    No  5
cat("\n\nDescriptive Statistics of Columns in Data Frame:\n")
## 
## 
## Descriptive Statistics of Columns in Data Frame:
st(VMchurn[, -1], add.median = TRUE, out = "csv", simple.kable = TRUE, col.align = "right", align = "right", digits = 4,
   title='Summary Statistics',
   summ = list(
     c('notNA(x)','mean(x)','sd(x)','min(x)', 'pctile(x)[25]', 'median(x)', 'pctile(x)[75]', 'max(x)', 'propNA(x)', 'getmode(x)'),
     c('notNA(x)','mean(x)')
   ),
   summ.names = list(
     c('N','Mean','SD','Min','P25','P50','P75', 'Max','NA','Mode'),
     c('Count','Percent')
   )
)
##                         Variable    N   Mean     SD  Min   P25   P50   P75
## 1                         Gender 3333                                     
## 2                     ... Female 1621 48.63%                              
## 3                       ... Male 1712 51.37%                              
## 4                  SeniorCitizen 3333 0.1101 0.3131    0     0     0     0
## 5                  MaritalStatus 3333                                     
## 6                         ... No 1831 54.94%                              
## 7                        ... Yes 1502 45.06%                              
## 8                     Dependents 3333                                     
## 9                         ... No 2237 67.12%                              
## 10                       ... Yes 1096 32.88%                              
## 11                        Tenure 3333     28  23.26    0     7    23    48
## 12                  PhoneService 3333                                     
## 13                       ... Yes 3333   100%                              
## 14                 MultipleLines 3333                                     
## 15                        ... No 3024 90.73%                              
## 16                       ... Yes  309  9.27%                              
## 17               InternetService 3333                                     
## 18                       ... DSL 1036 31.08%                              
## 19               ... Fiber optic 1118 33.54%                              
## 20                        ... No 1179 35.37%                              
## 21                OnlineSecurity 3333                                     
## 22                        ... No 1356 40.68%                              
## 23       ... No internet service 1179 35.37%                              
## 24                       ... Yes  798 23.94%                              
## 25                  OnlineBackup 3333                                     
## 26                        ... No 1289 38.67%                              
## 27       ... No internet service 1179 35.37%                              
## 28                       ... Yes  865 25.95%                              
## 29              DeviceProtection 3333                                     
## 30                        ... No 1320  39.6%                              
## 31       ... No internet service 1179 35.37%                              
## 32                       ... Yes  834 25.02%                              
## 33                   TechSupport 3333                                     
## 34                        ... No 1353 40.59%                              
## 35       ... No internet service 1179 35.37%                              
## 36                       ... Yes  801 24.03%                              
## 37                   StreamingTV 3333                                     
## 38                        ... No 1266 37.98%                              
## 39       ... No internet service 1179 35.37%                              
## 40                       ... Yes  888 26.64%                              
## 41               StreamingMovies 3333                                     
## 42                        ... No 1242 37.26%                              
## 43       ... No internet service 1179 35.37%                              
## 44                       ... Yes  912 27.36%                              
## 45                      Contract 3333                                     
## 46            ... Month-to-month 1790 53.71%                              
## 47                  ... One year  762 22.86%                              
## 48                  ... Two year  781 23.43%                              
## 49              PaperlessBilling 3333                                     
## 50                        ... No 1638 49.14%                              
## 51                       ... Yes 1695 50.86%                              
## 52                 PaymentMethod 3333                                     
## 53 ... Bank transfer (automatic)  694 20.82%                              
## 54   ... Credit card (automatic)  704 21.12%                              
## 55          ... Electronic check  887 26.61%                              
## 56              ... Mailed check 1048 31.44%                              
## 57             InternationalPlan 3333                                     
## 58                        ... No 3010 90.31%                              
## 59                       ... Yes  323  9.69%                              
## 60                 VoiceMailPlan 3333                                     
## 61                        ... No 2411 72.34%                              
## 62                       ... Yes  922 27.66%                              
## 63           NumbervMailMessages 3333  8.099  13.69    0     0     0    20
## 64               TotalDayMinutes 3333  179.8  54.47    0 143.7 179.4 216.4
## 65                 TotalDayCalls 3333  100.4  20.07    0    87   101   114
## 66               TotalEveMinutes 3333    201  50.71    0 166.6 201.4 235.3
## 67                 TotalEveCalls 3333  100.1  19.92    0    87   100   114
## 68             TotalNightMinutes 3333  200.9  50.57 23.2   167 201.2 235.3
## 69               TotalNightCalls 3333  100.1  19.57   33    87   100   113
## 70              TotalIntlMinutes 3333  10.24  2.792    0   8.5  10.3  12.1
## 71                TotalIntlCalls 3333  4.479  2.461    0     3     4     6
## 72          CustomerServiceCalls 3333  1.563  1.315    0     1     1     2
## 73                     TotalCall 3333  306.7  34.45  194   284   307   330
## 74                  TotalRevenue 3328   1673   1920 18.8 252.6 892.5  2434
## 75                         Churn 3333                                     
## 76                        ... No 2850 85.51%                              
## 77                       ... Yes  483 14.49%                              
## 78                            ID 3333   1667  962.3    1   834  1667  2500
##      Max                 NA Mode
## 1                               
## 2                               
## 3                               
## 4      1                  0     
## 5                               
## 6                               
## 7                               
## 8                               
## 9                               
## 10                              
## 11    72                  0     
## 12                              
## 13                              
## 14                              
## 15                              
## 16                              
## 17                              
## 18                              
## 19                              
## 20                              
## 21                              
## 22                              
## 23                              
## 24                              
## 25                              
## 26                              
## 27                              
## 28                              
## 29                              
## 30                              
## 31                              
## 32                              
## 33                              
## 34                              
## 35                              
## 36                              
## 37                              
## 38                              
## 39                              
## 40                              
## 41                              
## 42                              
## 43                              
## 44                              
## 45                              
## 46                              
## 47                              
## 48                              
## 49                              
## 50                              
## 51                              
## 52                              
## 53                              
## 54                              
## 55                              
## 56                              
## 57                              
## 58                              
## 59                              
## 60                              
## 61                              
## 62                              
## 63    51                  0     
## 64 350.8                  0     
## 65   165                  0     
## 66 363.7                  0     
## 67   170                  0     
## 68   395                  0     
## 69   175                  0     
## 70    20                  0     
## 71    20                  0     
## 72     9                  0     
## 73   418                  0     
## 74  8476 0.0015001500150015     
## 75                              
## 76                              
## 77                              
## 78  3333                  0

3.3 Splitting Data

# Loading other packages if not available
if(! "rsample" %in% installed.packages()) { install.packages("rsample", dependencies = TRUE) }
library(rsample)

# Convert X and Y in Numeric Columns
VMchurn$ChurnNum  <- ifelse(VMchurn$Churn == 'No', 0, 1)
VMchurn$VMplanNum <- ifelse(VMchurn$VoiceMailPlan == 'No', 0, 1)
VMchurn$CSChig    <- ifelse(VMchurn$CustomerServiceCalls > 3, 1, 0)
VMchurn$CSCmed    <- ifelse(VMchurn$CustomerServiceCalls > 1 & VMchurn$CustomerServiceCalls < 4, 1, 0)

# Split data into train and test
set.seed(1)
split <- initial_split(VMchurn, prop = 0.80)
VMtrain <- training(split)
VMtest  <- testing(split)

# Show Split Data
cat("\n\nShow Structure of the Training Data Frame:\n")
## 
## 
## Show Structure of the Training Data Frame:
cat("Training Data Frame includes ", nrow(VMtrain), " rows and ", ncol(VMtrain), " columns.")
## Training Data Frame includes  2666  rows and  38  columns.
cat("\n\nShow Structure of the Testing Data Frame:\n")
## 
## 
## Show Structure of the Testing Data Frame:
cat("Testing Data Frame includes ", nrow(VMtest), " rows and ", ncol(VMtest), " columns.")
## Testing Data Frame includes  667  rows and  38  columns.

3.4 Performing Regression

# Load Library if not available
if(! "stats" %in% installed.packages()) { install.packages("stats", dependencies = TRUE) }
library(stats)

LinReg2 <- lm(ChurnNum ~ VMplanNum, data = VMtrain)
LogReg2 <- glm(ChurnNum ~ VMplanNum, data = VMtrain, family = "binomial")

summary(LogReg2)
## 
## Call:
## glm(formula = ChurnNum ~ VMplanNum, family = "binomial", data = VMtrain)
## 
## Coefficients:
##             Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)   -1.593      0.061  -26.10 < 0.0000000000000002 ***
## VMplanNum     -0.768      0.143   -5.35          0.000000086 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2212.2  on 2665  degrees of freedom
## Residual deviance: 2179.7  on 2664  degrees of freedom
## AIC: 2184
## 
## Number of Fisher Scoring iterations: 5
anova(LogReg2)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: ChurnNum
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev    Pr(>Chi)    
## NULL                       2665       2212                
## VMplanNum  1     32.5      2664       2180 0.000000012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.5 Performing Regression with Multiple Xs

# Run Regression with all Predictors
LogReg2a <- glm(ChurnNum ~ VMplanNum + Tenure + TotalDayMinutes + TotalEveMinutes + TotalNightMinutes + TotalIntlMinutes + CSCmed + CSChig, data = VMtrain, family = "binomial")

summary(LogReg2a, digits = 5)
## 
## Call:
## glm(formula = ChurnNum ~ VMplanNum + Tenure + TotalDayMinutes + 
##     TotalEveMinutes + TotalNightMinutes + TotalIntlMinutes + 
##     CSCmed + CSChig, family = "binomial", data = VMtrain)
## 
## Coefficients:
##                   Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)       -6.08378    0.57527  -10.58 < 0.0000000000000002 ***
## VMplanNum         -0.91883    0.16624   -5.53          0.000000033 ***
## Tenure            -0.05761    0.00451  -12.76 < 0.0000000000000002 ***
## TotalDayMinutes    0.01413    0.00129   10.95 < 0.0000000000000002 ***
## TotalEveMinutes    0.00605    0.00132    4.59          0.000004340 ***
## TotalNightMinutes  0.00181    0.00129    1.40              0.16199    
## TotalIntlMinutes   0.08746    0.02407    3.63              0.00028 ***
## CSCmed             0.01496    0.14591    0.10              0.91832    
## CSChig             2.46057    0.19653   12.52 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2212.2  on 2665  degrees of freedom
## Residual deviance: 1552.9  on 2657  degrees of freedom
## AIC: 1571
## 
## Number of Fisher Scoring iterations: 6
anova(LogReg2a)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: ChurnNum
## 
## Terms added sequentially (first to last)
## 
## 
##                   Df Deviance Resid. Df Resid. Dev             Pr(>Chi)    
## NULL                               2665       2212                         
## VMplanNum          1       32      2664       2180          0.000000012 ***
## Tenure             1      316      2663       1863 < 0.0000000000000002 ***
## TotalDayMinutes    1      109      2662       1754 < 0.0000000000000002 ***
## TotalEveMinutes    1       14      2661       1741              0.00020 ***
## TotalNightMinutes  1        1      2660       1740              0.32080    
## TotalIntlMinutes   1       12      2659       1728              0.00069 ***
## CSCmed             1       12      2658       1716              0.00044 ***
## CSChig             1      163      2657       1553 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.6 Performing Regression with Multiple Xs

# Run Regression with all Predictors
LogReg3 <- glm(ChurnNum ~ VMplanNum + Tenure + TotalDayMinutes + TotalEveMinutes + TotalNightMinutes + TotalIntlMinutes + CSChig, data = VMtrain, family = "binomial")

summary(LogReg3, digits = 5)
## 
## Call:
## glm(formula = ChurnNum ~ VMplanNum + Tenure + TotalDayMinutes + 
##     TotalEveMinutes + TotalNightMinutes + TotalIntlMinutes + 
##     CSChig, family = "binomial", data = VMtrain)
## 
## Coefficients:
##                   Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)       -6.07321    0.56580  -10.73 < 0.0000000000000002 ***
## VMplanNum         -0.91925    0.16619   -5.53          0.000000032 ***
## Tenure            -0.05762    0.00451  -12.76 < 0.0000000000000002 ***
## TotalDayMinutes    0.01412    0.00129   10.96 < 0.0000000000000002 ***
## TotalEveMinutes    0.00605    0.00132    4.60          0.000004285 ***
## TotalNightMinutes  0.00180    0.00129    1.40              0.16294    
## TotalIntlMinutes   0.08742    0.02406    3.63              0.00028 ***
## CSChig             2.45448    0.18726   13.11 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2212.2  on 2665  degrees of freedom
## Residual deviance: 1552.9  on 2658  degrees of freedom
## AIC: 1569
## 
## Number of Fisher Scoring iterations: 6
anova(LogReg3)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: ChurnNum
## 
## Terms added sequentially (first to last)
## 
## 
##                   Df Deviance Resid. Df Resid. Dev             Pr(>Chi)    
## NULL                               2665       2212                         
## VMplanNum          1       32      2664       2180          0.000000012 ***
## Tenure             1      316      2663       1863 < 0.0000000000000002 ***
## TotalDayMinutes    1      109      2662       1754 < 0.0000000000000002 ***
## TotalEveMinutes    1       14      2661       1741              0.00020 ***
## TotalNightMinutes  1        1      2660       1740              0.32080    
## TotalIntlMinutes   1       12      2659       1728              0.00069 ***
## CSChig             1      175      2658       1553 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.7 Showing McFadden’s Rsquare

# Load Library if not available
if(! "pscl" %in% installed.packages()) { install.packages("pscl", dependencies = TRUE) }
library(pscl)

pR2(LogReg3)
## fitting null model for pseudo-r2
##        llh    llhNull         G2   McFadden       r2ML       r2CU 
##  -776.4689 -1106.0906   659.2435     0.2980     0.2191     0.3885

4 Logistic Regression with Space Shuttle Data

4.1 Getting Data

# Download Training Data from URL

4.2 Exploring Data for Shuttle

# Loading other packages if not available
if(! "vtable" %in% installed.packages()) { install.packages("vtable", dependencies = TRUE) }
library(vtable)

# Add ID to Data Frame
Shuttle$ID <- 1:nrow(Shuttle)

# Show Characteristics of Data Frame
cat("\n\nColumns Available in Data Frame:\n")
## 
## 
## Columns Available in Data Frame:
names(Shuttle)
## [1] "Flights"       "Date"          "nfails.field"  "nfails.nozzle"
## [5] "fail.field"    "fail.nozzle"   "TempC"         "TotalFails"   
## [9] "ID"
cat("\n\nShow Structure of the Data Frame:\n")
## 
## 
## Show Structure of the Data Frame:
str(Shuttle)
## 'data.frame':    23 obs. of  9 variables:
##  $ Flights      : Factor w/ 23 levels "1","2","3","41-B",..: 1 2 3 8 17 21 22 23 4 5 ...
##  $ Date         : Factor w/ 23 levels "03 Feb 1984",..: 8 11 14 7 3 13 21 18 1 5 ...
##  $ nfails.field : int  0 1 0 0 0 0 0 0 1 1 ...
##  $ nfails.nozzle: int  0 0 0 0 2 0 0 0 1 1 ...
##  $ fail.field   : int  0 1 0 0 0 0 0 0 1 1 ...
##  $ fail.nozzle  : int  0 0 0 0 1 0 0 0 1 1 ...
##  $ TempC        : num  18.9 21.1 20.6 20 19.4 22.2 22.8 21.1 13.9 17.2 ...
##  $ TotalFails   : int  0 1 0 0 3 0 0 0 3 3 ...
##  $ ID           : int  1 2 3 4 5 6 7 8 9 10 ...
cat("\n\nFirst 5 Rows of Data Frame:\n")
## 
## 
## First 5 Rows of Data Frame:
head(Shuttle, 5)
##   Flights        Date nfails.field nfails.nozzle fail.field fail.nozzle TempC
## 1       1 12 Apr 1981            0             0          0           0  18.9
## 2       2 12 Nov 1981            1             0          1           0  21.1
## 3       3 22 Mar 1982            0             0          0           0  20.6
## 4       5 11 Nov 1982            0             0          0           0  20.0
## 5       6 04 Apr 1983            0             2          0           1  19.4
##   TotalFails ID
## 1          0  1
## 2          1  2
## 3          0  3
## 4          0  4
## 5          3  5
cat("\n\nDescriptive Statistics of Columns in Data Frame:\n")
## 
## 
## Descriptive Statistics of Columns in Data Frame:
st(Shuttle[, -c(1, 2)], add.median = TRUE, out = "csv", simple.kable = TRUE, col.align = "right", align = "right", digits = 5,
   title='Summary Statistics',
   summ = list(
     c('notNA(x)','mean(x)','sd(x)','min(x)', 'pctile(x)[25]', 'median(x)', 'pctile(x)[75]', 'max(x)', 'propNA(x)', 'getmode(x)'),
     c('notNA(x)','mean(x)')
   ),
   summ.names = list(
     c('N','Mean','SD','Min','P25','P50','P75', 'Max','NA','Mode'),
     c('Count','Percent')
   )
)
##        Variable  N    Mean      SD  Min  P25  P50  P75  Max NA Mode
## 1  nfails.field 23  0.3913 0.65638    0    0    0    1    2  0     
## 2 nfails.nozzle 23 0.73913 0.91539    0    0    0    2    2  0     
## 3    fail.field 23 0.30435 0.47047    0    0    0    1    1  0     
## 4   fail.nozzle 23 0.43478 0.50687    0    0    0    1    1  0     
## 5         TempC 23  20.861  3.9195 11.7 19.4 21.1 23.9 27.2  0     
## 6    TotalFails 23  1.5652   1.674    0    0    1    3    5  0     
## 7            ID 23      12  6.7823    1  6.5   12 17.5   23  0

4.3 Calculating Confidence Interval for Pearson’s Correlation Coefficient

# Loading Package if Not Available
if(! "confintr" %in% installed.packages()) { install.packages("confintr", dependencies = TRUE) }
library(confintr)

# Calculate Correlation Coefficient
ci_cor(
  x = Shuttle$TempC,
  y = Shuttle$TotalFails,
  probs = c(0.025, 0.975),
  method = "pearson"
)
## 
##  Two-sided 95% normal confidence interval for the true Pearson
##  correlation coefficient
## 
## Sample estimate: -0.5874 
## Confidence interval:
##    2.5%   97.5% 
## -0.8048 -0.2312

4.4 Performing Linear Regression

# Load Library if not available
if(! "stats" %in% installed.packages()) { install.packages("stats", dependencies = TRUE) }
library(stats)

# Run regression
LinReg3    <- lm(TotalFails ~ TempC,  data = Shuttle)

summary(LinReg3)
## 
## Call:
## lm(formula = TotalFails ~ TempC, data = Shuttle)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.057 -1.154 -0.251  1.102  2.323 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.7988     1.5998    4.25  0.00036 ***
## TempC        -0.2509     0.0754   -3.33  0.00321 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.39 on 21 degrees of freedom
## Multiple R-squared:  0.345,  Adjusted R-squared:  0.314 
## F-statistic: 11.1 on 1 and 21 DF,  p-value: 0.00321
anova(LinReg3)
## Analysis of Variance Table
## 
## Response: TotalFails
##           Df Sum Sq Mean Sq F value Pr(>F)   
## TempC      1   21.3   21.27    11.1 0.0032 **
## Residuals 21   40.4    1.92                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate Predictions
Shuttle$PredictLm <- predict(LinReg3)

# Calculate RMSE
RMSELinReg3 <- sqrt(mean((Shuttle$TotalFails - Shuttle$PredictLm)^2))

cat("\n\nRMSE of Linear Regression: ", sprintf(RMSELinReg3, fmt = '%#.4f'), "\n")
## 
## 
## RMSE of Linear Regression:  1.3250

4.5 Plotting Data with Regression Line

# Plot Data with Regression Line
plot(Shuttle$TempC, Shuttle$TotalFails, 
     main = "Space Shuttle: Scatter Plot of Outside Temperature vs Total Fails", 
     xlab = "Outside Temperature", 
     ylab = "Number of Fails", 
     pch = 16, 
     col = "blue") 

abline(LinReg3, col = "red")
grid(col = "lightgrey", lty = "dotted")

4.6 Plotting Confidence and Prediction Interval

# Loading other packages
if(! "tidyverse" %in% installed.packages()) { install.packages("tidyverse", dependencies = TRUE) }
library(tidyverse)
if(! "quantreg" %in% installed.packages()) { install.packages("quantreg", dependencies = TRUE) }
library(quantreg)

# 95% quantile, two tailed
rq_low  <- rq(Shuttle$TotalFails ~ Shuttle$TempC, data = Shuttle, tau = 0.025)  #lower quantile
rq_high <- rq(Shuttle$TotalFails ~ Shuttle$TempC, data = Shuttle, tau = 0.975)  #upper quantile

Shuttle %>% 
  mutate(low  = rq_low$coefficients[1]  + Shuttle$TempC * rq_low$coefficients[2],
         high = rq_high$coefficients[1] + Shuttle$TempC * rq_high$coefficients[2]) %>% 
  ggplot() +
  geom_point(aes(Shuttle$TempC, Shuttle$TotalFails), size = 4) +
  geom_smooth(aes(Shuttle$TempC, Shuttle$TotalFails), method = "lm") +
  geom_line(aes(Shuttle$TempC, low),  linetype = "dashed", color = "red") +
  geom_line(aes(Shuttle$TempC, high), linetype = "dashed", color = "red") +
  theme_bw() +
  labs(title = "Space Shuttle Launch: Probability of Fails over Outside Temperature",
              subtitle = "Based on Real Data",
              caption = "Data source: NASA",
              x = "Outside Temperature in ºC", 
              y = "Number of Fails") +
    theme(
    plot.title = element_text(hjust = 0.5, size = 20),
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    plot.caption = element_text(hjust = 0, face = "italic", size = 10),
    plot.x = element_text(size = 16),
  )

4.7 Performing Logistic Regression

# Load Library if not available
if(! "stats" %in% installed.packages()) { install.packages("stats", dependencies = TRUE) }
library(stats)

# Make 0/1 Column
Shuttle$Fail <- ifelse(Shuttle$TotalFails > 0, 1, 0)

# Run Regression
LogReg3 <- glm(fail.field ~ TempC, data = Shuttle, family = "binomial")

summary(LogReg3)
## 
## Call:
## glm(formula = fail.field ~ TempC, family = "binomial", data = Shuttle)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    7.584      3.915    1.94    0.053 .
## TempC         -0.417      0.194   -2.15    0.032 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 28.267  on 22  degrees of freedom
## Residual deviance: 20.335  on 21  degrees of freedom
## AIC: 24.33
## 
## Number of Fisher Scoring iterations: 5
anova(LogReg3)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: fail.field
## 
## Terms added sequentially (first to last)
## 
## 
##       Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                     22       28.3            
## TempC  1     7.93        21       20.3   0.0049 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.8 Showing Plot with CI for Shuttle

# Load Library if not available
if(! "sjPlot" %in% installed.packages()) { install.packages("sjPlot", dependencies = TRUE) }
library(sjPlot)

# Plot Probability with CI
pm3 <- plot_model(LogReg3, 
           type = "pred"
)

pm3 + labs(title = "Space Shuttle Launch: Probability of Fails over Outside Temperature",
              subtitle = "Based on Real Data",
              caption = "Data source: NASA",
              x = "Outside Temperature", 
              y = "Number of Fails") +
    theme(
    plot.title = element_text(hjust = 0.5, size = 20),
    plot.subtitle = element_text(hjust = 0.5, size = 14),
    plot.caption = element_text(hjust = 0, face = "italic", size = 10),
    plot.x = element_text(size = 14),
  )

4.9 Showing McFadden’s Rsquare

# Load Library if not available
if(! "pscl" %in% installed.packages()) { install.packages("pscl", dependencies = TRUE) }
library(pscl)

pR2(LogReg3)
## fitting null model for pseudo-r2
##      llh  llhNull       G2 McFadden     r2ML     r2CU 
## -10.1674 -14.1336   7.9323   0.2806   0.2917   0.4123

4.10 Predicting the Desaster

cat("\n\nOutside Temperature at launch of Challenger was 36°F, i.e., 2.2°C.\n")
## 
## 
## Outside Temperature at launch of Challenger was 36°F, i.e., 2.2°C.
cat("Until then the lowest Outside Temperature at launch of any Shuttle was", min(round(Shuttle$TempC, 2)), "°C.\n")
## Until then the lowest Outside Temperature at launch of any Shuttle was 11.7 °C.
cat("What is the Risk of an O-Ring Field Failure?\n")
## What is the Risk of an O-Ring Field Failure?
newTemp <- data.frame(TempC = c(2.2))
newData <- data.frame(TempC = c(2.2, 3, 4, 20, 30))


newRisk <- predict(LogReg3, newdata = newTemp, type = "response")

cat("The Risk of Field Failure at 2.2°C Outside Temperature was", round(newRisk * 100, 2), "%.\n\n")
## The Risk of Field Failure at 2.2°C Outside Temperature was 99.87 %.
predictions <- predict(LogReg3, newdata = newData, type = "link", se.fit = TRUE)

alpha <- 0.05
z_value <- qnorm(1 - alpha / 2)
lower_logit <- predictions$fit - z_value * predictions$se.fit
upper_logit <- predictions$fit + z_value * predictions$se.fit

lower_prob <- exp(lower_logit) / (1 + exp(lower_logit))
upper_prob <- exp(upper_logit) / (1 + exp(upper_logit))

results <- data.frame(
  Temperature    = newData$TempC,
  Lower_Prob     = paste(round(lower_prob * 100, 2) , "%"),
  Predicted_Prob = paste(round(exp(predictions$fit) / (1 + exp(predictions$fit)) * 100, 2) , "%"),
  Upper_Prob     = paste(round(upper_prob * 100, 2) , "%")
)

# Insert space between Columns


# Print table with CIs
cat("\n\nSome more prediction data with 95% Confidence Interval:\n\n")
## 
## 
## Some more prediction data with 95% Confidence Interval:
print.data.frame(results)
##   Temperature Lower_Prob Predicted_Prob Upper_Prob
## 1         2.2    45.56 %        99.87 %      100 %
## 2         3.0    44.74 %        99.82 %      100 %
## 3         4.0    43.71 %        99.73 %      100 %
## 4        20.0    13.78 %         32.1 %    58.31 %
## 5        30.0     0.01 %         0.73 %    27.87 %

4.11 Calculating Odds Ratio and 95% CI and Predict for Shuttle

# Calculate Odds Ratio and 95% CI
cat("\n\nThe Odds Ratio for Field Failure at an Outside Temperature of 0.0°C is:", round(exp(coef(LogReg3)[1]), 4) , ".\n")
## 
## 
## The Odds Ratio for Field Failure at an Outside Temperature of 0.0°C is: 1966 .
cat("For every degree increase, the Odds Ratio increases by:", round(exp(coef(LogReg3)[2]), 4) , ".\n\n")
## For every degree increase, the Odds Ratio increases by: 0.6593 .
suppressMessages(exp(cbind(OR = coef(LogReg3), confint(LogReg3))))
##                    OR 2.5 %        97.5 %
## (Intercept) 1965.9744 3.805 52874563.0058
## TempC          0.6593 0.397        0.8967