Вы находитесь на странице: 1из 6

DEEP LEARNING

WITH H20 IN R
14 QUICK STEPS
STEP 1 Load Libraries

suppressWarnings(suppressMessages(library(h2o)))
suppressWarnings(suppressMessages(library(data.table)))
suppressWarnings(suppressMessages(library(ggplot2)))

library(IRkernel)

options(repr.plot.width = 8)
options(repr.plot.height = 4)

STEP 2 Initialize H2O

h2o.init()

STEP 3 Load Data

train_path <- '/Volumes/Development/Jupyter/churnTrain.csv'


test_path <- '/Volumes/Development/Jupyter/churnTest.csv'

train <- h2o.importFile(path = train_path)


test <- h2o.importFile(path = test_path)
train_df <- as.data.frame(train)
test_df <- as.data.frame(test)

STEP 4 Review Data

head(train_df, 5)
STEP 5 Selecting Vars for ML

response <- "churn"

train[[response]] <- as.factor(train[[response]])


response

predictors <- setdiff(names(train), c(response))


predictors

STEP 6 Create GBM

gbm <- h2o.gbm(

x = predictors,
y = response,
training_frame=train,
nfolds=5,
keep_cross_validation_predictions=TRUE,
keep_cross_validation_fold_assignment=TRUE,
score_each_iteration=TRUE,
model_id="churn_gbm",
distribution='bernoulli',
max_depth=10,
min_rows=10,
nbins_top_level=200,
nbins_cats=200,
learn_rate=.01,
learn_rate_annealing=1,
min_split_improvement=0.0000001,
stopping_rounds=5,
ntrees=100,
sample_rate=1,
col_sample_rate=1,
seed = 8
)
STEP 7 Review Model Result

gbm@model$cross_validation_metrics_summary

(overall model performance)

h2o.performance(gbm, xval = TRUE)

(variable performance)

var_imp <- as.data.frame(head(h2o.varimp(gbm), 10))


var_imp

(Plotting graph)

g <- ggplot(data=var_imp, aes(x=variable, y=scaled_importance,


fill=variable)) +
geom_bar(stat="identity")
g + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle('Scaled Variable Importance')

(cbind prediction)

cv_preds <- as.data.frame(h2o.getFrame(gbm@model[["cross_validation_holdout_p


redictions_frame_id"]][["name"]]))

train_plus <- cbind(train_df, cv_preds)

head(train_plus, 5)

STEP 8 Review Test set performance

h2o.performance(gbm, newdata = test)


test_preds <- as.data.frame(h2o.predict(gbm, newdata = test))
test_plus <- cbind(test_df, test_preds)

head(test_plus, 5)
STEP 9 Graph Accuracy of Classes

pos_acc <- sum(test_plus$churn == 'yes' & test_plus$predict == 'yes') /


sum(test_plus$churn == 'yes')

neg_acc <- sum(test_plus$churn == 'no' & test_plus$predict == 'no') /


sum(test_plus$churn == 'no')

acc_df <- data.frame(Class = c('Churn', 'Retained'), Model_Accuracy = c(pos_a


cc, neg_acc))

acc_df

(Plotting graph)

g <- ggplot(data=acc_df, aes(x=Class, y=Model_Accuracy, fill=Class)) +


geom_bar(stat="identity")

g + theme(axis.text.x = element_text(angle = 45, hjust = 1)) +


ggtitle('Model Accuracy by Class')

STEP 10 Exporting Model

# save the model

model_path <- h2o.saveModel(object=gbm, path=getwd(), force=TRUE)

print(model_path)
LOAD MODEL

STEP 1 Load libraries

suppressWarnings(suppressMessages(library(h2o)))

suppressWarnings(suppressMessages(library(data.table)))

STEP 2 Initialize H2O

h2o.init()

STEP 3 Load Model and sample predict

data_path <- '/Volumes/Development/Jupyter/churn_yes.csv'


model_path <- '/Volumes/Development/Jupyter/churn_gbm'
saved_model <- h2o.loadModel(model_path)

data <- h2o.importFile(path = data_path)


data_df <- as.data.frame(data)

STEP 4 Use Model to predict new data point

prediction <- as.data.frame(h2o.predict(saved_model, newdata = data))

test_plus <- cbind(data_df, prediction)


test_plus

Вам также может понравиться