data<-read.csv("strokedata.csv")
head(data)
X age hypertension heart_disease avg_glucose_level bmi smoking_freq stroke
1 1 3 0 0 95.12 18.0 0 0
2 2 58 1 0 87.96 39.2 0 0
3 3 8 0 0 110.89 17.6 0 0
4 4 70 0 0 69.04 35.9 2 0
5 5 14 0 0 161.28 19.1 0 0
6 6 47 0 0 210.95 50.1 0 0
library(caret)
index<-createDataPartition(data$stroke,p=0.7,list=FALSE)
data$stroke<-as.factor(data$stroke)
traindata<-data[index,]
testdata<-data[-index,]
model <- glm(stroke ~ ., data = traindata, family = "binomial")
testdata$predprob<-predict(model,testdata,type='response')
testdata$predY<-as.factor(ifelse(testdata$predprob>0.5,1,0))
confusionMatrix(testdata$predY,testdata$stroke,positive="1")
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 6377 5709
1 259 236
Accuracy : 0.5256
95% CI : (0.5169, 0.5344)
No Information Rate : 0.5275
P-Value [Acc > NIR] : 0.6627
Kappa : 7e-04
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.03970
Specificity : 0.96097
Pos Pred Value : 0.47677
Neg Pred Value : 0.52764
Prevalence : 0.47254
Detection Rate : 0.01876
Detection Prevalence : 0.03935
Balanced Accuracy : 0.50033
'Positive' Class : 1
library(Epi)
rc <- ROC(form=stroke ~ ., data = traindata, plot="sp")
opt <- which.max(rowSums(rc$res[, c("sens", "spec")]))
threshold1<-rc$res$lr.eta[opt]
threshold1
[1] 0.4673331
testdata$predY<-as.factor(ifelse(testdata$predprob>threshold1,1,0))
confusionMatrix(testdata$predY,testdata$stroke,positive="1")
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 3400 3035
1 3236 2910
Accuracy : 0.5015
95% CI : (0.4928, 0.5103)
No Information Rate : 0.5275
P-Value [Acc > NIR] : 1.00000
Kappa : 0.0018
Mcnemar's Test P-Value : 0.01155
Sensitivity : 0.4895
Specificity : 0.5124
Pos Pred Value : 0.4735
Neg Pred Value : 0.5284
Prevalence : 0.4725
Detection Rate : 0.2313
Detection Prevalence : 0.4885
Balanced Accuracy : 0.5009
'Positive' Class : 1
library(pROC)
probabilities <- predict(model, type = "response")
roc_data <- roc(traindata$stroke, probabilities)
threshold2 <- coords(roc_data, "best")$threshold
threshold2
[1] 0.4673331
testdata$predY<-as.factor(ifelse(testdata$predprob>threshold2,1,0))
confusionMatrix(testdata$predY,testdata$stroke,positive="1")
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 3400 3035
1 3236 2910
Accuracy : 0.5015
95% CI : (0.4928, 0.5103)
No Information Rate : 0.5275
P-Value [Acc > NIR] : 1.00000
Kappa : 0.0018
Mcnemar's Test P-Value : 0.01155
Sensitivity : 0.4895
Specificity : 0.5124
Pos Pred Value : 0.4735
Neg Pred Value : 0.5284
Prevalence : 0.4725
Detection Rate : 0.2313
Detection Prevalence : 0.4885
Balanced Accuracy : 0.5009
'Positive' Class : 1