Results Analysis in R

# read.csv(file.
choose(),header=TRUE) -> marks

# read.csv(file.choose(),header=TRUE) -> snames
# merge(snames,marks,by=intersect(names(snames),names(marks))) -> cresult
read.csv(file.choose(), header=TRUE, stringsAsFactors = F ) -> cresult
for(i in 4:21)
{
cresult[,i] <- as.integer(cresult[,i])
}
cresult[,22] <- factor((cresult[,22]))

cresult[,23] <- factor((cresult[,23]))
sum(is.na(cresult))
# SN =c("PS","EDC","ET","SS","GUI","MPMC","EDC LAB","GUI LAB","MPMC LAB")

# v <- character()
# rep("-",9)
# ra <- data.frame(v,v,v,v,v)
#SN =c("PEHV","DCS","ATFL","OS","DBMS","WT","SSL","DBMSL","WTL")
SN =c("MIII","DMS","DLD","OS","DS","OOP","SSL","DSL","OOPL")
ra <- matrix(nrow = 9,ncol = 5)
# colnames(ra) <- c("SUB NAME","GTE80","GT59LE79","GTE40LT60","FAILURES")
j=1
### Performance of students in theory subjects
for(i in seq(4,14,by=2))
{
tv <- cresult[,i]+cresult[,i+1]
v1 <- length(which(tv > 79))
v2 <- length(which((cresult[,i] > 23) & (tv > 59) & (tv < 80)))
v3 <- length(which((cresult[,i] > 23) & (tv >= 40) & (tv < 60)))
v4 <- length(which((tv < 40) | (cresult[,i] < 24)))
v5 <- length(which(is.na(cresult[,i])))
nr <- c(v1,v2,v3,v4,v5)
ra[j,] = nr
j=j+1
}
### Performance of students in labs
{
v1 <- length(which(tv > 79))
v2 <- length(which((cresult[,i] > 29) & (tv > 59) & (tv < 80)))
v3 <- length(which((cresult[,i] > 29) & (tv >= 40) & (tv < 60)))
v4 <- length(which((tv < 40) | (cresult[,i] < 30)))
v5 <- length(which(is.na(cresult[,i])))
nr <- c(v1,v2,v3,v4,v5)
ra[j,] = nr
j=j+1
}
ra <- data.frame(ra)
colnames(ra) <- c("GTE80","GT59LE79","GTE40LT60","Failures","Absentees")
rownames(ra) <- SN
# Plots of the result analysis
# barplot(as.matrix(ra), main="Result Analysis",ylab = "Number",

#
col=heat.colors(9), space=0.1,cex=1)
# legend(3.5, 300, rownames(ra), cex=0.8, fill=heat.colors(9))

#
barplot(as.matrix(ra), main="Result Analysis",
col=rainbow(9), space=0.1,axes = F)
legend("topright", rownames(ra), cex=1, fill=rainbow(9),bty="n")
#
barplot(t(ra), main="Result Analysis", ylab="Number",xlab = "Subject Name",
col=rainbow(5), beside = TRUE)
legend("top", colnames(ra), cex=0.6, fill=rainbow(5),bty = "n")
#
# all_data <- as.matrix(ra)
# for(i in 1:9)
#{
# sub_data <- all_data[i,]
# sub_lables <- round(sub_data/sum(sub_data) * 100,1)
# sub_lables <- paste(sub_lables,"%",sep=" ")
# pie(sub_data, main=paste(SN[i],"Result Analysis",sep=" "),
col=rainbow(length(sub_data)),
#
labels=sub_lables)
# legend("topleft", colnames(ra), cex=1,

#
fill=rainbow(length(sub_data)),bty = "n")
#}
# Register numbers of students who failed in each theory subject
r <- list()
j <- 1
{
r[[j]] <- cresult[which((tv < 40) | (cresult[,i] < 24)),2]
print(paste("Students who failed in",SN[j],sep=" "))
print(r[[j]])
j=j+1
}
# Register numbers of students who failed in each lab
{
r[[j]] <- cresult[which((tv < 40) | (cresult[,i] < 30)),2]
print(paste("Students who failed in",SN[j],sep=" "))
print(r[[j]])
j=j+1
}
# Register numbers of students who failed exclusively in each subject

ef <- list()
for(i in 1:9)
{
u <- factor()
for(j in setdiff(1:6,i))
{
u <- union(u,r[[j]])
}
ef[[i]] <- setdiff(r[[i]],u)
print(paste("Students who failed exclusively in",SN[i],sep=" "))
print(ef[[i]])
}
# Find duplicate records

dup.records <- cresult[which(duplicated(cresult$REG.NO)),"REG.NO"]
cresult[which(cresult$REG.NO %in% dup.records),]
library(stringr)
################# Find previous batch student's records
# prevstuds <- cresult[which(str_detect(cresult$REG.NO, "Y13")),]

# prevstuds <- rbind(prevstuds,cresult[which(str_detect(cresult$REG.NO, "Y12")),])
# prevstuds <- cresult[grep("Y13",cresult$REG.NO),]

# prevstuds <- rbind(prevstuds,cresult[grep("Y12",cresult$REG.NO),])
# prevstuds[,1:3]
################# Descriptive Statistics
#### Univariate statistics for a qualitative variable

# (i.e. one categorical variable)
table(cresult$GEN)
plot(cresult$GEN,main = "Gender distribution",ylim=c(0,max(table(cresult$GEN))
+10), xlab = "Gender",ylab = "Count/Frequency",col="#abc255")
plot(cresult$SEC,main = "Student distribution",ylim=c(0,max(table(cresult$SEC))

+10), xlab = "Secton",ylab = "Count/Frequency",col="#def255")
pie(table(cresult$GEN))
pie(table(cresult$SEC))
#### Univariate statistics for a quantitative variable
# (i.e. one numeric variable)
# Analyze the dispersion (spread)

min(cresult$X301E,na.rm = TRUE)
max(cresult$X301E,na.rm = TRUE)
# Analyze central tendancy (location)

mean(cresult$X301E,na.rm = TRUE)
median(cresult$X301E,na.rm = TRUE)
# For mode calculation
which.max(table(cresult$X301E,useNA = "ifany"))
range(cresult$X301E,na.rm = TRUE)
diff(range(cresult$X301E,na.rm = TRUE))
quantile(cresult$X301E,na.rm = TRUE)
quantile(cresult$X301E,0.95,na.rm = TRUE)
IQR(cresult$X301E,na.rm = TRUE)
var(cresult$X301E,na.rm = TRUE)
sd(cresult$X301E,na.rm = TRUE)
# Summarize a quantitative variable

summary(cresult$X301E,na.rm = TRUE)
boxplot(x=cresult$X301E, xlab="Marks in MIII external", horizontal = TRUE,
col="#fed786")
hist(cresult$X301E)
#### Bivariate statistics for two qualitative variables

# (i.e. two categorical variables)
table(cresult$SEC,cresult$GEN)
#### Bivariate statistics for two quantitative variables

# (i.e. two numeric variables)
# Covarience
cov(cresult$X305E,cresult$L302E,use="complete.obs",method = "pearson") #
"pearson", "kendall", "spearman"
cov(cresult$X305I,cresult$L302I,use="complete.obs")
# Correlation coefficients
cor(cresult$X305E,cresult$L302E,use="complete.obs")
cor(cresult$X305I,cresult$L302I,use="complete.obs")
plot(cresult$X305E,cresult$L302E)
lm(cresult$L302E ~ cresult$X305E) -> bfrl
lm(cresult$L302E ~ cresult$L302I + cresult$X305I)
lm(cresult$X305E ~ cresult$GEN+cresult$X305I) -> rmod
####### Bivariate statistics for both a qualitative and quantitative variable

# (i.e. one categorical and one numeric variable)
tapply(cresult$X305E, list(cresult$SEC,cresult$GEN), mean,na.rm=TRUE)
tapply(cresult$X305I, list(cresult$SEC,cresult$GEN), mean,na.rm=TRUE)
# Summarize entire table

summary(cresult)

Results Analysis in R

Загружено:

Сведения о документе

Авторское право

Доступные форматы

Поделиться этим документом

Поделиться или встроить документ

Параметры публикации

Этот документ был вам полезен?

Это неприемлемый материал?

Авторское право:

Доступные форматы

Results Analysis in R

Загружено:

Авторское право:

Доступные форматы

# read.csv(file.

choose(),header=TRUE) -> marks

read.csv(file.choose(), header=TRUE, stringsAsFactors = F ) -> cresult

cresult[,22] <- factor((cresult[,22]))

# SN =c("PS","EDC","ET","SS","GUI","MPMC","EDC LAB","GUI LAB","MPMC LAB")

### Performance of students in theory subjects

### Performance of students in labs

# Plots of the result analysis

# barplot(as.matrix(ra), main="Result Analysis",ylab = "Number",

# legend(3.5, 300, rownames(ra), cex=0.8, fill=heat.colors(9))

# legend("topleft", colnames(ra), cex=1,

# Register numbers of students who failed in each theory subject

# Register numbers of students who failed in each lab

# Register numbers of students who failed exclusively in each subject

# Find duplicate records

# prevstuds <- cresult[which(str_detect(cresult$REG.NO, "Y13")),]

# prevstuds <- cresult[grep("Y13",cresult$REG.NO),]

################# Descriptive Statistics

#### Univariate statistics for a qualitative variable

plot(cresult$SEC,main = "Student distribution",ylim=c(0,max(table(cresult$SEC))

# Analyze the dispersion (spread)

# Analyze central tendancy (location)

# Summarize a quantitative variable

#### Bivariate statistics for two qualitative variables

#### Bivariate statistics for two quantitative variables

####### Bivariate statistics for both a qualitative and quantitative variable

# Summarize entire table

Вам также может понравиться