Академический Документы
Профессиональный Документы
Культура Документы
setwd('D:/Work/SA Training/Clustering')
#load data
fullData <- read.csv("OxaneData2.csv")
#standardization (method1)
newData2 <- scale(newData)
# method2
fn_norm <- function(var){
newData[,var] <- (newData[,var] - min(newData[,var]))/(max(newData[,var])
-min(newData[,var]))
return(newData[,var])
}
#Creating training data and test data
FinData <- cbind(LoanId,newData2,DummyVar)
set.seed(3)
test = sample(1:nrow(FinData),floor(nrow(FinData)/10))
train = -test
training_data = FinData[train,]
testing_data = FinData[test,]
# Estimating the number of clusters (method1) - This should be used when you have
powerful systems
install.packages('NbClust')
library(NbClust)
nb <- NbClust(testing_data, distance = "euclidean", min.nc = 2,
max.nc = 4, method = "complete", index ="all")
kmeans(training_data, 6)$tot.withinss
#Find out the cluster no. for each loan id test dataset
#Hint: use dist() function to find out the euclidean distance between two vectors.
Use cluster centers (centroids) to find out the nearest cluster.
k1$centers