churn_data<-read.csv("churn.all",header=T)
View(churn_data)
summary_all<-summary(churn_data)
View(summary_all)
summary_churn<-summary(subset(churn_data,Status=='TRUE'))))
summary_churn<-summary(subset(churn_data,Status=='TRUE')))
summary_churn<-summary(subset(churn_data,Status=='TRUE'))
summary_active<-summary(subset(churn_data,Status=='FALSE'))
write.csv(rbind(summary_all,summary_churn,summary_active),file="summary_file.csv")")
"
write.csv(rbind(summary_all,summary_churn,summary_active),file="summary_file.csv")
cor_data<-churn_data
cor_data$Status<-NULL
cor_data$voice.mail.plan<-NULL
cor_data$international.plan <-NULL
cor_data$phone.number<-NULL
cor_data$state<-NULL
correlation_all<-cor(cor_data)
write.csv(correlation_all,file="correlation_file.csv")
churn_data$total.day.charge<-NULL
churn_data$total.eve.charge<-NULL
churn_data$total.night.charge<-NULL
churn_data$total.intl.charge<-NULL 
write.csv(churn_data,file="churn_data_clean.all.csv",row.names = F)
churn_data$avg.minute.day<-churn_data$total.day.minutes/churn_data$total.day.calls
churn_data$avg.minute.eve<-churn_data$total.eve.minutes/churn_data$total.eve.calls
churn_data$avg.minute.night<-churn_data$total.night.minutes/churn_data$total.night.calls
churn_data$avg.minute.intl<-churn_data$total.intl.minutes/churn_data$total.intl.calls
q()
View(churn_data)
write.csv(churn_data,file="churn_data_clean.all.csv",row.names = F)
q()
churn_data<-read.csv("churn.all",header=T)
View(churn_data)
churn_data$total.day.charge<-NULL
churn_data$total.eve.charge<-NULL
churn_data$total.night.charge<-NULL
churn_data$total.intl.charge<-NULL
churn_data$state<-NULL
churn_data$phone.number<-NULL 
churn_data$avg.minute.day<-churn_data$total.day.minutes/churn_data$total.day.calls
churn_data$avg.minute.eve<-churn_data$total.eve.minutes/churn_data$total.eve.calls
churn_data$avg.minute.night<-churn_data$total.night.minutes/churn_data$total.night.calls
churn_data$avg.minute.intl<-churn_data$total.intl.minutes/churn_data$total.intl.calls
smp_size <- floor(0.75 * nrow(churn_data))
## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(churn_data)), size = smp_size)
train <- churn_data[train_ind, ]
test <- churn_data[-train_ind, ]
table(train$Status)
table(test$Status)
smp_size
write.csv(train,file="churn_data_clean.all.csv",row.names = F)
write.csv(test,file="churn_data_clean_test.all.csv",row.names = F)
exit
q()
