Академический Документы
Профессиональный Документы
Культура Документы
Practical Questions
(ii) Create a rule set E that contains rules to check for the following conditions:
4. If age is less than 18 the age group should be child, if age is between 18 and 65 the age
group should be adult , if age is more than 65 the age group should be elderly
(iii) Check whether rule set E is violated by the data in the file people.txt .
people.txt
21 adult 6 single -1
2 child 3 married 0
34 child -7 married 3
people.r
install.packages("editrules")
library(editrules)
attach(d)
E <- editset(expression(
age >= 0,
))
sm <- violatedEdits(E,d)
sm <- as.data.frame(sm)
summary(sm)
plot(sm)
OUTPUT:
View(E) Ruleset
Violations Summary
iii) Define these rules in a separate text file and read them.
iv) Determine how often each rule is broken (violated Edits). Also summarize and plot the
result.
v) Find outliers in sepal length using boxplot and boxplot.stats
library(dplyr)
library(editrules)
dirtyIris= read.csv(file='dirty_iriss.csv',sep=',',header=TRUE)
countt=0
for(i in 1:nrow(dirtyIris)){
for(j in 1:ncol(dirtyIris)){
if(is.na(dirtyIris[i,j])==TRUE){
countt=countt+1
break
}
}
}
print(countt)
percentage=countt*100/(nrow(dirtyIris));
print('Percentage of complete entries are: ')
print(100-percentage)
for(i in 1:nrow(dirtyIris)){
for(j in 1:4){
if(is.na(dirtyIris[i,j])!=TRUE){
if(is.infinite(dirtyIris[i,j])==TRUE | dirtyIris[i,j]<0){
dirtyIris[i,j]= NA
}
}
}
}
View(dirtyIris)
stat = boxplot.stats(dirtyIris['Sepal.Length'])
View(stat$out)
View(dirtyIris[!is.na(dirtyIris$Sepal.Length) & dirtyIris$Sepal.Length %in% stat$out, ])
OUTPUT:
library(BBmisc)
colMeanWine = c()
for(col in names(wine)){
if(class(wine[,col]) == 'numeric' || class(wine[,col]) == 'integer'){
colMeanWine[col] = colMeans(wine[col], na.rm = TRUE)
if(colMeanWine[col] != 0){
wine[col] = normalize(wine[col], method = "standardize")
}
}
}
for(col in names(wine)){
if(class(wine[,col]) == 'numeric' || class(wine[,col]) == 'integer'){
colMeanWine[col] = colMeans(wine[col], na.rm = TRUE)
}
}
View(wine)
View(colMeanWine)
colMeanIris = c()
for(col in names(iris)){
if(class(iris[,col]) == 'numeric' || class(iris[,col]) == 'integer'){
colMeanIris[col] = colMeans(iris[col], na.rm = TRUE)
if(colMeanIris[col] != 0){
iris[col] = normalize(iris[col], method = "standardize")
}
}
}
for(col in names(iris)){
if(class(iris[,col]) == 'numeric' || class(iris[,col]) == 'integer'){
colMeanIris[col] = colMeans(iris[col], na.rm = TRUE)
}
}
View(iris)
View(colMeanIris)
OUTPUT:
Run following algorithms on 2 real datasets and use appropriate evaluation measures to c
ompute correctness of obtained patterns:
Program 4: Run Apriori algorithm to find frequent itemsets and association rules
4.1 Use minimum support as 50% and minimum confidence as 75%
Program4.R
library(arules)
data(Adult)
rules <- apriori(Adult, parameter = list(supp = 0.5, conf = .75, target = "rules"))
summary(rules)
inspect(head(rules))
OUTPUT:
Inspection of rules:
Program 5: Use Naive bayes, K-nearest, and Decision tree classification algorithms and bui
ld classifiers. Divide the data set in to training and test set. Compare the accuracy of the di
fferent classifiers under the following situations:
Program5.R
#Holdout method
smp_size <- floor(0.75 * nrow(iris))
train <- iris[1:smp_size, ]
test <- iris[-(1:smp_size), ]
#Naive Bayes
model <- naiveBayes(Species ~ ., data = train)
prediction <- predict(model, test)
View(confusionMatrix(prediction, test[,5])$table, 'NaiveBayes using Holdout')
#Decision Tree
model <- rpart(Species ~ ., data = train)
prediction <- predict(model, test, type = "class")
View(confusionMatrix(prediction, test[,5])$table, 'Decision Tree using Holdout')
#KNN
prediction = knn(train[,-5], test[,-5], factor(train[,5]), k = 10)
View(confusionMatrix(prediction, test[,5])$table, 'KNN using Holdout')
#Random Subsampling
smp_size <- floor(0.75 * nrow(iris))
set.seed(123)
train_ind <- sample(nrow(iris), size = smp_size)
train <- iris[train_ind, ]
test <- iris[-train_ind, ]
#Naive Bayes
model <- naiveBayes(Species ~ ., data = train)
prediction <- predict(model, test)
View(confusionMatrix(prediction, test[,5])$table, 'NaiveBayes using Random Sampling')
#Decision Tree
model <- rpart(Species ~ ., data = train)
prediction <- predict(model, test, type = "class")
View(confusionMatrix(prediction, test[,5])$table, 'Decision Tree using Random Sampling')
#KNN
prediction = knn(train[,-5], test[,-5], factor(train[,5]), k = 10)
View(confusionMatrix(prediction, test[,5])$table, 'KNN using Random Sampling')
#K-Fold Cross Validation Naive Bayes
train_control <- trainControl(method="cv", number=10)
model <- train(Species~., data=iris, trControl=train_control, method="nb")
prediction <- predict(model, test)
View(confusionMatrix(prediction, test[,5])$table, 'NaiveBayes using KFOLD')
OUTPUT:
program6.R
library(dbscan)
#kmeans
cl <- kmeans(iris[,-5], 3)
plot(iris[,-5], col = cl$cluster)
points(cl$centers, col = 1:3, pch = 8)
#heirarchical
clusters <- hclust(dist(iris[, -5]))
plot(clusters)
#DBScan
cl <- dbscan(iris[,-5], eps = .5, minPts = 5)
plot(iris[,-5], col = cl$cluster)
OUTPUT:
K-Means Cluster
Hierarchical Cluster
DB Scan CLustering