Вы находитесь на странице: 1из 3

# Clear workspace

rm(list=ls())
#clear console
cat("\014")
#close plots
dev.off(dev.list()["RStudioGD"])
# Include packages
library (ISLR)
library(glmnet)
library(ncvreg)
library(caTools)
# Import data
data(Hitters)

# Removing zero entries


Hitters =na.omit(Hitters )

# Loop for Least square regression


LS_val.errors=rep(NA,10)
for(i in 1:10){

# Splitting into training, validation and test set


smp_size <- floor(0.6 * nrow(Hitters))
train_ind <- sample(seq_len(nrow(Hitters)), size = smp_size)
train <- Hitters[train_ind, ]
nottrain <- Hitters[-train_ind, ]

smp_size <- floor(0.5 * nrow(nottrain))


validation_ind <- sample(seq_len(nrow(nottrain)), size = smp_size)
validation <- nottrain[validation_ind, ]
test <- nottrain[-validation_ind, ]

# predictor and response matrices for training, validation and test

X.train=model.matrix (Salary~.,train )[,-1]


Y.train=log(train$Salary)

X.validation=model.matrix (Salary~.,validation )[,-1]


Y.validation=log(validation$Salary)

X.test=model.matrix (Salary~.,test )[,-1]


Y.test=log(test$Salary)

# standadaizing data
Y.train = Y.train - mean(Y.train)
X.train = scale(X.train,center=T,scale=T)

Y.validation = Y.validation - mean(Y.validation)


X.validation = scale(X.validation,center=T,scale=T)

Y.test = Y.test - mean(Y.test)


X.test = scale(X.test,center=T,scale=T)
# LS regression
betals = solve(t(X.train)%*%X.train)%*%t(X.train)%*%Y.train
Yhat = X.test%*%betals
LS_val.errors[i]=mean((Y.test-Yhat)^2)
}

# Loop for Ridge regression


R_val.errors=rep(NA,10)
for(i in 1:10){

# Splitting into training, validation and test set


smp_size <- floor(0.6 * nrow(Hitters))
train_ind <- sample(seq_len(nrow(Hitters)), size = smp_size)
train <- Hitters[train_ind, ]
nottrain <- Hitters[-train_ind, ]

smp_size <- floor(0.5 * nrow(nottrain))


validation_ind <- sample(seq_len(nrow(nottrain)), size = smp_size)
validation <- nottrain[validation_ind, ]
test <- nottrain[-validation_ind, ]

# predictor and response matrices for training, validation and test

X.train=model.matrix (Salary~.,train )[,-1]


Y.train=log(train$Salary)

X.validation=model.matrix (Salary~.,validation )[,-1]


Y.validation=log(validation$Salary)

X.test=model.matrix (Salary~.,test )[,-1]


Y.test=log(test$Salary)

# standadaizing data
Y.train = Y.train - mean(Y.train)
X.train = scale(X.train,center=T,scale=T)

Y.validation = Y.validation - mean(Y.validation)


X.validation = scale(X.validation,center=T,scale=T)

Y.test = Y.test - mean(Y.test)


X.test = scale(X.test,center=T,scale=T)

# Ridge regression
grid =10^ seq (1.5,-2, length =1000)

fit = glmnet(x=X.train,y=Y.train,lambda = grid,alpha=0)


Yhat = predict(fit,newx=X.validation,s=grid)
er = rep(NA,1000)
for(j in 1:1000){
er[j] = mean((Y.validation-Yhat[,j])^2)
}
pos = which.min(er)
lm = grid[pos]
Yhat = predict(fit,newx=X.test,s=lm)

R_val.errors[i]=mean((Y.test-Yhat)^2)

Вам также может понравиться