Вы находитесь на странице: 1из 7

# read.csv(file.

choose(),header=TRUE) -> marks


# read.csv(file.choose(),header=TRUE) -> snames
# merge(snames,marks,by=intersect(names(snames),names(marks))) -> cresult

read.csv(file.choose(), header=TRUE, stringsAsFactors = F ) -> cresult

for(i in 4:21)
{
cresult[,i] <- as.integer(cresult[,i])
}

cresult[,22] <- factor((cresult[,22]))


cresult[,23] <- factor((cresult[,23]))

sum(is.na(cresult))

# SN =c("PS","EDC","ET","SS","GUI","MPMC","EDC LAB","GUI LAB","MPMC LAB")


# v <- character()

# rep("-",9)

# ra <- data.frame(v,v,v,v,v)

#SN =c("PEHV","DCS","ATFL","OS","DBMS","WT","SSL","DBMSL","WTL")
SN =c("MIII","DMS","DLD","OS","DS","OOP","SSL","DSL","OOPL")
ra <- matrix(nrow = 9,ncol = 5)
# colnames(ra) <- c("SUB NAME","GTE80","GT59LE79","GTE40LT60","FAILURES")
j=1

### Performance of students in theory subjects

for(i in seq(4,14,by=2))
{
tv <- cresult[,i]+cresult[,i+1]
v1 <- length(which(tv > 79))
v2 <- length(which((cresult[,i] > 23) & (tv > 59) & (tv < 80)))
v3 <- length(which((cresult[,i] > 23) & (tv >= 40) & (tv < 60)))
v4 <- length(which((tv < 40) | (cresult[,i] < 24)))
v5 <- length(which(is.na(cresult[,i])))
nr <- c(v1,v2,v3,v4,v5)
ra[j,] = nr
j=j+1
}

### Performance of students in labs

for(i in seq(16,20,by=2))
{
tv <- cresult[,i]+cresult[,i+1]
v1 <- length(which(tv > 79))
v2 <- length(which((cresult[,i] > 29) & (tv > 59) & (tv < 80)))
v3 <- length(which((cresult[,i] > 29) & (tv >= 40) & (tv < 60)))
v4 <- length(which((tv < 40) | (cresult[,i] < 30)))
v5 <- length(which(is.na(cresult[,i])))
nr <- c(v1,v2,v3,v4,v5)
ra[j,] = nr
j=j+1
}
ra <- data.frame(ra)
colnames(ra) <- c("GTE80","GT59LE79","GTE40LT60","Failures","Absentees")
rownames(ra) <- SN

# Plots of the result analysis

# barplot(as.matrix(ra), main="Result Analysis",ylab = "Number",


#

col=heat.colors(9), space=0.1,cex=1)

# legend(3.5, 300, rownames(ra), cex=0.8, fill=heat.colors(9))


#
barplot(as.matrix(ra), main="Result Analysis",
col=rainbow(9), space=0.1,axes = F)
legend("topright", rownames(ra), cex=1, fill=rainbow(9),bty="n")
#
barplot(t(ra), main="Result Analysis", ylab="Number",xlab = "Subject Name",
col=rainbow(5), beside = TRUE)
legend("top", colnames(ra), cex=0.6, fill=rainbow(5),bty = "n")
#
# all_data <- as.matrix(ra)
# for(i in 1:9)
#{
# sub_data <- all_data[i,]
# sub_lables <- round(sub_data/sum(sub_data) * 100,1)
# sub_lables <- paste(sub_lables,"%",sep=" ")
# pie(sub_data, main=paste(SN[i],"Result Analysis",sep=" "),
col=rainbow(length(sub_data)),
#

labels=sub_lables)

# legend("topleft", colnames(ra), cex=1,


#

fill=rainbow(length(sub_data)),bty = "n")

#}

# Register numbers of students who failed in each theory subject

r <- list()
j <- 1
for(i in seq(4,14,by=2))
{
tv <- cresult[,i]+cresult[,i+1]
r[[j]] <- cresult[which((tv < 40) | (cresult[,i] < 24)),2]
print(paste("Students who failed in",SN[j],sep=" "))
print(r[[j]])
j=j+1
}

# Register numbers of students who failed in each lab

for(i in seq(16,20,by=2))
{
tv <- cresult[,i]+cresult[,i+1]
r[[j]] <- cresult[which((tv < 40) | (cresult[,i] < 30)),2]
print(paste("Students who failed in",SN[j],sep=" "))
print(r[[j]])
j=j+1
}

# Register numbers of students who failed exclusively in each subject


ef <- list()
for(i in 1:9)
{
u <- factor()
for(j in setdiff(1:6,i))
{

u <- union(u,r[[j]])
}
ef[[i]] <- setdiff(r[[i]],u)
print(paste("Students who failed exclusively in",SN[i],sep=" "))
print(ef[[i]])
}

# Find duplicate records


dup.records <- cresult[which(duplicated(cresult$REG.NO)),"REG.NO"]
cresult[which(cresult$REG.NO %in% dup.records),]

library(stringr)
################# Find previous batch student's records

# prevstuds <- cresult[which(str_detect(cresult$REG.NO, "Y13")),]


# prevstuds <- rbind(prevstuds,cresult[which(str_detect(cresult$REG.NO, "Y12")),])

# prevstuds <- cresult[grep("Y13",cresult$REG.NO),]


# prevstuds <- rbind(prevstuds,cresult[grep("Y12",cresult$REG.NO),])
# prevstuds[,1:3]

################# Descriptive Statistics

#### Univariate statistics for a qualitative variable


# (i.e. one categorical variable)
table(cresult$GEN)
plot(cresult$GEN,main = "Gender distribution",ylim=c(0,max(table(cresult$GEN))
+10), xlab = "Gender",ylab = "Count/Frequency",col="#abc255")

plot(cresult$SEC,main = "Student distribution",ylim=c(0,max(table(cresult$SEC))


+10), xlab = "Secton",ylab = "Count/Frequency",col="#def255")
pie(table(cresult$GEN))
pie(table(cresult$SEC))
#### Univariate statistics for a quantitative variable
# (i.e. one numeric variable)

# Analyze the dispersion (spread)


min(cresult$X301E,na.rm = TRUE)
max(cresult$X301E,na.rm = TRUE)

# Analyze central tendancy (location)


mean(cresult$X301E,na.rm = TRUE)
median(cresult$X301E,na.rm = TRUE)
# For mode calculation
which.max(table(cresult$X301E,useNA = "ifany"))

range(cresult$X301E,na.rm = TRUE)
diff(range(cresult$X301E,na.rm = TRUE))
quantile(cresult$X301E,na.rm = TRUE)
quantile(cresult$X301E,0.95,na.rm = TRUE)
IQR(cresult$X301E,na.rm = TRUE)
var(cresult$X301E,na.rm = TRUE)
sd(cresult$X301E,na.rm = TRUE)

# Summarize a quantitative variable


summary(cresult$X301E,na.rm = TRUE)
boxplot(x=cresult$X301E, xlab="Marks in MIII external", horizontal = TRUE,
col="#fed786")
hist(cresult$X301E)

#### Bivariate statistics for two qualitative variables


# (i.e. two categorical variables)
table(cresult$SEC,cresult$GEN)

#### Bivariate statistics for two quantitative variables


# (i.e. two numeric variables)

# Covarience
cov(cresult$X305E,cresult$L302E,use="complete.obs",method = "pearson") #
"pearson", "kendall", "spearman"
cov(cresult$X305I,cresult$L302I,use="complete.obs")

# Correlation coefficients
cor(cresult$X305E,cresult$L302E,use="complete.obs")
cor(cresult$X305I,cresult$L302I,use="complete.obs")
plot(cresult$X305E,cresult$L302E)
lm(cresult$L302E ~ cresult$X305E) -> bfrl
lm(cresult$L302E ~ cresult$L302I + cresult$X305I)
lm(cresult$X305E ~ cresult$GEN+cresult$X305I) -> rmod

####### Bivariate statistics for both a qualitative and quantitative variable


# (i.e. one categorical and one numeric variable)
tapply(cresult$X305E, list(cresult$SEC,cresult$GEN), mean,na.rm=TRUE)
tapply(cresult$X305I, list(cresult$SEC,cresult$GEN), mean,na.rm=TRUE)

# Summarize entire table


summary(cresult)

Вам также может понравиться