upload code

1 month ago · 5a975b4e67
--- a/Project-scriptv1.2.R
+++ b/Project-scriptv1.2.R
@ -0,0 +1,112 @@
 # Telco Customer Churn Analysis Script

 # Load required libraries

 library(ggplot2)
 library(dplyr)
 library(rpart)
 library(e1071)
 library(caret)
 library(pROC)

 # Load dataset

 telco <- read.csv("Telco-Customer-Churn.csv", stringsAsFactors = TRUE)
 telco\$TotalCharges <- as.numeric(as.character(telco\$TotalCharges))
 telco <- telco\[!is.na(telco\$TotalCharges), ]
 telco\$Churn <- factor(telco\$Churn, levels = c("No", "Yes"))

 # Exploratory Data Visualizations

 # Histogram for numeric variables

 numeric\_vars <- c("tenure", "MonthlyCharges", "TotalCharges")
 for (var in numeric\_vars) {
  p <- ggplot(telco, aes\_string(x = var)) +
    geom\_histogram(binwidth = 10, fill = "skyblue", color = "black") +
    labs(title = paste("Histogram of", var), x = var, y = "Frequency") +
    theme\_minimal()
  ggsave(paste0("hist\_", var, ".png"), plot = p)
 }

 # Bar plot for churn

 p\_churn <- ggplot(telco, aes(x = Churn, fill = Churn)) +
  geom\_bar() +
  labs(title = "Churn Distribution", x = "Churn", y = "Count") +
  theme\_minimal()
 ggsave("bar\_churn.png", plot = p\_churn)

 # Boxplot of MonthlyCharges by Churn

 p\_box <- ggplot(telco, aes(x = Churn, y = MonthlyCharges, fill = Churn)) +
  geom\_boxplot() +
  labs(title = "Monthly Charges by Churn", x = "Churn", y = "Monthly Charges") +
  theme\_minimal()
 ggsave("boxplot\_monthlycharges\_churn.png", plot = p\_box)

 # Split data into training, validation1, and validation2

 set.seed(100)
 n <- nrow(telco)
 train.index <- sample(1\:n, size = round(0.70 \* n))
 remaining.index <- setdiff(1\:n, train.index)
 valid1.index <- sample(remaining.index, size = round(0.15 \* n))
 valid2.index <- setdiff(remaining.index, valid1.index)

 train.df <- telco\[train.index, ]
 valid1.df <- telco\[valid1.index, ]
 valid2.df <- telco\[valid2.index, ]

 # Logistic regression model (simplified)

 logit.reg <- glm(Churn \~ SeniorCitizen + Dependents + tenure + MultipleLines + InternetService + Contract +
                   PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges,
                 data = train.df, family = "binomial")
 summary(logit.reg)

 # Evaluate on validation set

 valid1\_pred\_probs <- predict(logit.reg, newdata = valid1.df, type = "response")
 valid1\_pred <- factor(ifelse(valid1\_pred\_probs > 0.5, "Yes", "No"), levels = c("No", "Yes"))
 logit\_conf <- confusionMatrix(valid1\_pred, valid1.df\$Churn, positive = "Yes")
 logit\_roc <- roc(response = valid1.df\$Churn, predictor = valid1\_pred\_probs)

 # Decision Tree model

 dt\_model <- rpart(Churn \~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen, data = train.df, method = "class")
 dt\_pred <- predict(dt\_model, valid1.df, type = "class")
 dt\_conf <- confusionMatrix(dt\_pred, valid1.df\$Churn)

 # Naive Bayes model

 nb\_model <- naiveBayes(Churn \~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen, data = train.df)
 nb\_pred <- predict(nb\_model, valid1.df)
 nb\_probs <- predict(nb\_model, valid1.df, type = "raw")
 nb\_conf <- confusionMatrix(nb\_pred, valid1.df\$Churn)
 nb\_roc <- roc(response = valid1.df\$Churn, predictor = nb\_probs\[,"Yes"])

 # Print evaluations

 cat("\nLogistic Regression Confusion Matrix:\n")
 print(logit\_conf)
 cat("\nAUC (Logistic):", auc(logit\_roc), "\n")

 cat("\nDecision Tree Confusion Matrix:\n")
 print(dt\_conf)

 cat("\nNaive Bayes Confusion Matrix:\n")
 print(nb\_conf)
 cat("\nAUC (Naive Bayes):", auc(nb\_roc), "\n")

 # Save ROC curve plots

 png("logistic\_roc\_curve.png")
 plot(logit\_roc, main = "ROC Curve - Logistic Regression", col = "darkgreen")
 dev.off()

 png("naive\_bayes\_roc\_curve.png")
 plot(nb\_roc, main = "ROC Curve - Naive Bayes", col = "blue")
 dev.off()


--- a/Project/Project2.R
+++ b/Project/Project2.R
@ -0,0 +1,111 @@
 # Telco Customer Churn Analysis Script

 # Load required libraries

 library(ggplot2)
 library(dplyr)
 library(rpart)
 library(e1071)
 library(caret)
 library(pROC)

 # Load dataset

 telco <- read.csv("Telco-Customer-Churn.csv", stringsAsFactors = TRUE)
 telco\$TotalCharges <- as.numeric(as.character(telco\$TotalCharges))
 telco <- telco\[!is.na(telco\$TotalCharges), ]
 telco\$Churn <- factor(telco\$Churn, levels = c("No", "Yes"))

 # Exploratory Data Visualizations

 # Histogram for numeric variables

 numeric\_vars <- c("tenure", "MonthlyCharges", "TotalCharges")
 for (var in numeric\_vars) {
 p <- ggplot(telco, aes\_string(x = var)) +
 geom\_histogram(binwidth = 10, fill = "skyblue", color = "black") +
 labs(title = paste("Histogram of", var), x = var, y = "Frequency") +
 theme\_minimal()
 ggsave(paste0("hist\_", var, ".png"), plot = p)
 }

 # Bar plot for churn

 p\_churn <- ggplot(telco, aes(x = Churn, fill = Churn)) +
 geom\_bar() +
 labs(title = "Churn Distribution", x = "Churn", y = "Count") +
 theme\_minimal()
 ggsave("bar\_churn.png", plot = p\_churn)

 # Boxplot of MonthlyCharges by Churn

 p\_box <- ggplot(telco, aes(x = Churn, y = MonthlyCharges, fill = Churn)) +
 geom\_boxplot() +
 labs(title = "Monthly Charges by Churn", x = "Churn", y = "Monthly Charges") +
 theme\_minimal()
 ggsave("boxplot\_monthlycharges\_churn.png", plot = p\_box)

 # Split data into training, validation1, and validation2

 set.seed(100)
 n <- nrow(telco)
 train.index <- sample(1\:n, size = round(0.70 \* n))
 remaining.index <- setdiff(1\:n, train.index)
 valid1.index <- sample(remaining.index, size = round(0.15 \* n))
 valid2.index <- setdiff(remaining.index, valid1.index)

 train.df <- telco\[train.index, ]
 valid1.df <- telco\[valid1.index, ]
 valid2.df <- telco\[valid2.index, ]

 # Logistic regression model (simplified)

 logit.reg <- glm(Churn \~ SeniorCitizen + Dependents + tenure + MultipleLines + InternetService + Contract +
 PaperlessBilling + PaymentMethod + MonthlyCharges + TotalCharges,
 data = train.df, family = "binomial")
 summary(logit.reg)

 # Evaluate on validation set

 valid1\_pred\_probs <- predict(logit.reg, newdata = valid1.df, type = "response")
 valid1\_pred <- factor(ifelse(valid1\_pred\_probs > 0.5, "Yes", "No"), levels = c("No", "Yes"))
 logit\_conf <- confusionMatrix(valid1\_pred, valid1.df\$Churn, positive = "Yes")
 logit\_roc <- roc(response = valid1.df\$Churn, predictor = valid1\_pred\_probs)

 # Decision Tree model

 dt\_model <- rpart(Churn \~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen, data = train.df, method = "class")
 dt\_pred <- predict(dt\_model, valid1.df, type = "class")
 dt\_conf <- confusionMatrix(dt\_pred, valid1.df\$Churn)

 # Naive Bayes model

 nb\_model <- naiveBayes(Churn \~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen, data = train.df)
 nb\_pred <- predict(nb\_model, valid1.df)
 nb\_probs <- predict(nb\_model, valid1.df, type = "raw")
 nb\_conf <- confusionMatrix(nb\_pred, valid1.df\$Churn)
 nb\_roc <- roc(response = valid1.df\$Churn, predictor = nb\_probs\[,"Yes"])

 # Print evaluations

 cat("\nLogistic Regression Confusion Matrix:\n")
 print(logit\_conf)
 cat("\nAUC (Logistic):", auc(logit\_roc), "\n")

 cat("\nDecision Tree Confusion Matrix:\n")
 print(dt\_conf)

 cat("\nNaive Bayes Confusion Matrix:\n")
 print(nb\_conf)
 cat("\nAUC (Naive Bayes):", auc(nb\_roc), "\n")

 # Save ROC curve plots

 png("logistic\_roc\_curve.png")
 plot(logit\_roc, main = "ROC Curve - Logistic Regression", col = "darkgreen")
 dev.off()

 png("naive\_bayes\_roc\_curve.png")
 plot(nb\_roc, main = "ROC Curve - Naive Bayes", col = "blue")
 dev.off()

--- a/Project/tempt-code.R
+++ b/Project/tempt-code.R
@ -0,0 +1,38 @@
 # Telco Customer Churn Analysis Script

 # Load required libraries
 library(ggplot2)
 library(dplyr)
 library(rpart)
 library(e1071)
 library(caret)
 library(pROC)

 # Load dataset
 telco <- read.csv("Telco-Customer-Churn.csv", stringsAsFactors = TRUE)

 telco$TotalCharges <- as.numeric(as.character(telco$TotalCharges))
 telco <- telco[!is.na(telco$TotalCharges), ]

 telco$Churn <- factor(telco$Churn, levels = c("No", "Yes"))

 # Split data
 set.seed(42)
 trainIndex <- createDataPartition(telco$Churn, p = 0.7, list = FALSE)
 train <- telco[trainIndex, ]
 test <- telco[-trainIndex, ]

 # Decision Tree model
 dt_model <- rpart(Churn ~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen,
                  data = train, method = "class")
 dt_pred <- predict(dt_model, test, type = "class")
 dt_conf <- confusionMatrix(dt_pred, test$Churn)

 # Naive Bayes model
 nb_model <- naiveBayes(Churn ~ tenure + MonthlyCharges + TotalCharges + SeniorCitizen,
                       data = train)
 nb_pred <- predict(nb_model, test)
 nb_conf <- confusionMatrix(nb_pred, test$Churn)

 # ROC

--- a/Telco-Customer-Churn.csv
+++ b/Telco-Customer-Churn.csv
--- a/bar_churn.png
+++ b/bar_churn.png
--- a/bar_contract_churn.png
+++ b/bar_contract_churn.png
--- a/bar_internetservice_churn.png
+++ b/bar_internetservice_churn.png
--- a/boxplot_monthlycharges_churn.png
+++ b/boxplot_monthlycharges_churn.png
--- a/hist_monthlycharges.png
+++ b/hist_monthlycharges.png
--- a/hist_tenure.png
+++ b/hist_tenure.png