# Telco Customer Churn Analysis Script
|
|
|
|
# Load required libraries
|
|
library(ggplot2)
|
|
library(dplyr)
|
|
library(rpart)
|
|
library(e1071)
|
|
library(caret)
|
|
library(pROC)
|
|
|
|
# Load dataset
|
|
telco <- read.csv("Telco-Customer-Churn.csv", stringsAsFactors = TRUE)
|
|
|
|
telco$TotalCharges <- as.numeric(as.character(telco$TotalCharges))
|
|
telco <- telco[!is.na(telco$TotalCharges), ]
|
|
|
|
telco$Churn <- factor(telco$Churn, levels = c("No", "Yes"))
|
|
|
|
# Split data
|
|
set.seed(42)
|
|
trainIndex <- createDataPartition(telco$Churn, p = 0.7, list = FALSE)
|
|
train <- telco[trainIndex, ]
|
|
test <- telco[-trainIndex, ]
|
|
|
|
# Decision Tree model
|
|
dt_model <- rpart(Churn ~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen,
|
|
data = train, method = "class")
|
|
dt_pred <- predict(dt_model, test, type = "class")
|
|
dt_conf <- confusionMatrix(dt_pred, test$Churn)
|
|
|
|
# Naive Bayes model
|
|
nb_model <- naiveBayes(Churn ~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen,
|
|
data = train)
|
|
nb_pred <- predict(nb_model, test)
|
|
nb_conf <- confusionMatrix(nb_pred, test$Churn)
|
|
|
|
# ROC
|
|
|