Вы находитесь на странице: 1из 4

> surveydata = read.csv ("surveydata.

csv", header = TRUE)


> library(car)
> names(surveydata)[2] = "Age"
> names(surveydata)[3] = "Gender"
> names(surveydata)[4] = "Worklife"
> names(surveydata)[6] = "Stressfreelife"
> names(surveydata)[9] = "PhysicalActivity"
> names(surveydata)[11] = "Alchohol"
> names(surveydata)[12] = "TravelVacation"
> names(surveydata)[13] = "LifeGoal"
> names(surveydata)[14] = "DiabeticLevel"
> names(surveydata)[15] = "CholesterolLevel"
> names(surveydata)[16] = "BPLevel"
> names(surveydata)[17] = "FamilyHistory"
> names(surveydata)[18] = "JobFunction"
> names(surveydata)[19] = "TimeSpendonHobbies"
> names(surveydata)[20] = "FuturePlanning"
> names(surveydata)[10] = "Smoking"
> names(surveydata)[7] = "EatRegular"
> names(surveydata)[8] = "SleepPattern"
> names(surveydata)[5] = "Eatout"
> names(surveydata)[21] = "Height"
> names(surveydata)[22] = "Weight"
> names(surveydata)[23] = "City"
> names(surveydata)[24] = "MaritalStatus"
> names(surveydata)[26] = "ShareData"
> surveydata$BMI = surveydata$Weight/((surveydata$Height/100)^2)
> attach(surveydata)
The following objects are masked from surveydata (pos = 4):

Age, Alchohol, BMI, BPLevel, CholesterolLevel, City, DiabeticLevel, Eatout,


EatRegular, FamilyHistory, FitnessDevice, FuturePlanning, Gender, Height,
JobFunction, LifeGoal, MaritalStatus, PhysicalActivity, ShareData, SleepPattern,
Smoking, Stressfreelife, TimeSpendonHobbies, Timestamp, TravelVacation, Weight,
Worklife

> str(surveydata)
'data.frame': 321 obs. of 27 variables:
$ Timestamp : Factor w/ 319 levels "2018/04/13 10:17:42 PM GMT+5:30",..: 35 36
267 318 61 ...
$ Age : int 30 32 33 35 31 50 44 32 36 35 ...
$ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 2 1 1 1 ...
$ Worklife : Factor w/ 5 levels "Neutral","Somewhat agree",..: 2 4 2 3 2 1 2
$ Eatout : Factor w/ 5 levels "Everyday","Fortnightly",..: 4 5 2 4 3 5 4 5
$ Stressfreelife : Factor w/ 5 levels "Neutral","Somewhat agree",..: 1 2 3 3 3 1 5
$ EatRegular : Factor w/ 5 levels "Always","Mostly",..: 2 2 2 2 2 2 1 2 2 5 ..
$ SleepPattern : Factor w/ 5 levels "5-6 hours","6-7 hours",..: 4 3 1 3 1 4 1 2 2
$ PhysicalActivity : Factor w/ 5 levels "< 30 minutes",..: 5 4 3 1 1 1 3 1 2 5 ...
$ Smoking : Factor w/ 5 levels ">12","1 to 4",..: 5 5 5 5 5 5 5 5 5 5 ...
$ Alchohol : Factor w/ 6 levels "Daily","I dont drink",..: 2 2 2 5 2 2 2 2 2
$ TravelVacation : Factor w/ 5 levels "I dont get time",..: 2 3 2 5 1 3 3 2 2 2 ..
$ LifeGoal : Factor w/ 5 levels "I have not planned",..: 3 1 3 4 2 2 1 2 4 2
$ DiabeticLevel : Factor w/ 4 levels "Have problems though I take medicines",..: 2
..
$ CholesterolLevel : Factor w/ 4 levels "Have problems though I take medicines",..: 2
..
$ BPLevel : Factor w/ 4 levels "Have problems though I take medicines",..: 2
..
$ FamilyHistory : Factor w/ 3 levels "No","Not Aware",..: 2 1 1 3 3 3 3 1 1 3 ...
$ JobFunction : Factor w/ 5 levels "Desk Work","Factory Environment",..: 5 5 5 4
$ TimeSpendonHobbies: Factor w/ 5 levels "Daily or More than five hours",..: 3 4 4 5
$ FuturePlanning : Factor w/ 4 levels "I am aware how to do, but dont find time",.
1 ...
$ Height : num 155 160 157 152 153 150 177 158 150 152 ...
$ Weight : num 43 45 45 47 51.5 52 52 52 52 53 ...
$ City : Factor w/ 82 levels "Andhrapradesh",..: 18 18 15 4 15 7 62 4 64
$ MaritalStatus : Factor w/ 3 levels "Divorced","Married",..: 3 3 2 2 3 2 2 2 2 2
$ FitnessDevice : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
$ ShareData : Factor w/ 5 levels "Cash Vouchers or Coupons",..: 3 2 2 2 2 2 5
$ BMI : num 17.9 17.6 18.3 20.3 22 ...
> surveydata$TIER <- recode(trimws(surveydata$City),"c('Delhi',
+ 'Delhi ',
+ 'Delhi NCR',
+ 'Faridabad',
+ 'Ghaziabad',
+ 'Gurgaon',
+ 'Mumbai',
+ 'Mumbai ',
+ 'Mumbai goregaon',
+ 'navi mumbai',
+ 'New Delhi',
+ 'Noida',
+ 'panvel',
+ 'Thane')
+ ='Tier 1';c('Asansol',
+ 'Aurangabad',
+ 'Bangalore',
+ 'Bangalore ',
+ 'Bengaluru',
+ 'Bengaluru ',
+ 'Bharuch',
+ 'Bhubaneswar',
+ 'Bhubaneswar ',
+ 'California',
+ 'Chandigarh',
+ 'Chennai',
+ 'Chennai ',
+ 'City',
+ 'Coimbatore',
+ 'Dehradun',
+ 'Gautam Budh nagar',
+ 'Guntur',
+ 'Guwahati',
+ 'Hisar',
+ 'Hyderabad',
+ 'Hyderabad ',
+ 'Jaipur',
+ 'Jalna',
+ 'Johannesburg',
+ 'Kalyan',
+ 'Kochi',
+ 'Kochi ',
+ 'Kolkata',
+ 'Kolkata ',
+ 'kolkata',
+ 'London',
+ 'Nagapattinam',
+ 'Nagpur',
+ 'NASHIK',
+ 'Nellore',
+ 'Nellore ',
+ 'New York',
+ 'Overland park, ks',
+ 'Pune',
+ 'Pune',
+ 'Ranchi',
+ 'SCRANTON, PA',
+ 'Singapore',
+ 'Surst',
+ 'Sydney',
+ 'Tamilnadu',
+ 'Tanjavoor',
+ 'Thrissur',
+ 'Trichur',
+ 'Trichy',
+ 'Trichy ',
+ 'Trivandrum',
+ 'Urban',
+ 'Utrecht',
+ 'Vancouver',
+ 'Varanasi',
+ 'Wellington',
+ 'Yes','HYDERABAD','chennai','CHENNAI','KOLKATA','Andhraprad
swar')='Tier 2'")
> surveydata$Worklife = as.factor(surveydata$Worklife)
> surveydata$TravelVacation1 <- recode(trimws(surveydata$TravelVacation),"c('I dont get
in 3 years')=4; c('Once in 2 years')=2; c('Once a year')=1; c('More than once
> surveydata$LifeGoal1 <- recode(trimws(surveydata$LifeGoal),"c('I have not planned')=
icult')=3; c('Very Difficult')=4; c('Somewhat Easy')=2; c('Very Easy')=1")
> surveydata$TimeSpendonHobbies1 <- recode(trimws(surveydata$TimeSpendonHobbies),"c('Da
e hours')=1; c('Three to Five hours')=2; c('One to Three hours')=3; c('Zero to
('I dont find time')=5")
> surveydata$Stressfreelife1 <- recode(trimws(surveydata$Stressfreelife),"c('Strongly a
hat agree')=2; c('Neutral')=3; c('Somewhat Disagree')=4; c('Strongly Disagree')=5")
> names(surveydata)[25] = "FitnessDevice"
> surveydata$Worklife1 <- recode(trimws(surveydata$Worklife),"c('Strongly agree')=1;c(
('Neutral')=3;
+ c('Somewhat Disagree')=4;c('Strongly Disagree')=5")
> surveydata <- surveydata[ which(BMI > 15 & BMI < 55),]
> surveydata$FuturePlanning1 <- recode(trimws(surveydata$FuturePlanning),"c('I do, reg
when I find time')=2; c('I am aware how to do, but dont find time')=3; c('I am not
")
> surveydata$StressCount <- surveydata$Worklife1*0.3 + surveydata$Stressfreelife1*0.2 +
cation1*0.1 + surveydata$LifeGoal1*0.1 + surveydata$TimeSpendonHobbies1*0.2 + surveydat
1
> surveydata$StressFactor <- ifelse(surveydata$StressCount <= 1.8,"Acute",ifelse(survey
3.4,"Episodic","Chronic"))
> surveydata$Eatout1 <- recode(trimws(surveydata$Eatout),"c('Everyday')=5; c('Weekly'
=3; c('Monthly')=2; c('I dont eat outside')=1")
> surveydata$EatRegular1 <- recode(trimws(surveydata$EatRegular),"c('Always')=1; c('
imes')=3; c('Rarely')=4; c('Never')=5")
> surveydata$SleepPattern1 <- recode(trimws(surveydata$SleepPattern),"c('less than 5 ho
ours')=3; c('6-7 hours')=2; c('7-8 hours')=1; c('more than 8 hours')=4")
> surveydata$PhysicalActivity1 <- recode(trimws(surveydata$PhysicalActivity),"c('None')
)=4; c('30 min to < 1 hour')=2; c('1 hour to < 2 hours')=1; c('2 hours or more
> surveydata$Lifestylecount <- surveydata$Eatout1*0.2 + surveydata$EatRegular1*0.2 + s
n1*0.2 + surveydata$PhysicalActivity1*0.4
> surveydata$LifeStyle <- ifelse(surveydata$Lifestylecount <= 2, "Active", ifelse(surve
<= 3.4, "Moderate", "Sedentary"))
> surveydata$Smoke <- recode(trimws(surveydata$Smoking),"c('I dont smoke')='No';c('1 to
2','>12')='Yes'")
> surveydata$Drink <- recode(trimws(surveydata$Alchohol),"c('I dont drink','One to two
ly')='No';c('Three to Six times a month','More than two times a week','Daily')='Yes'")
> surveydata$DiabeticLevel1 <- recode(trimws(surveydata$DiabeticLevel),"c('Not aware o
recent past')=3; c('Normal (I am not taking any medicine for the same)')=1; c('
')=2; c('Have problems though I take medicines')=4")
> surveydata$CholesterolLevel1 <- recode(trimws(surveydata$CholesterolLevel),"c('Not aw
n the recent past')=3; c('Normal (I am not taking any medicine for the same)')=1; c('
')=2; c('Have problems though I take medicines')=4")
> surveydata$BPLevel1 <- recode(trimws(surveydata$BPLevel),"c('Not aware or Not checked
)=3; c('Normal (I am not taking any medicine for the same)')=1; c('Normal with Med
problems though I take medicines')=4")
> surveydata$Healthcount <- surveydata$DiabeticLevel1*0.4 + surveydata$CholesterolLevel
PLevel1*0.3
> surveydata$Health <- ifelse(surveydata$Healthcount <= 1, "Healthy",ifelse(surveydata
oderate","Unhealthy"))
> surveydata$FamilyHistory <- recode(trimws(surveydata$FamilyHistory),"c('Not Aware / H
Aware'")
> survey_clean$JobFunction = recode(trimws(survey_clean$JobFunction),"c('Work on Comput
='Work on Computer'")
Error in sub(re, "", x, perl = TRUE) : object 'survey_clean' not found
> summary(survey_clean)
Error in summary(survey_clean) : object 'survey_clean' not found
> surveydata$JobFunction = recode(trimws(surveydata$JobFunction),"c('Work on Computer m
rk on Computer'")
> survey_clean <- surveydata[,c(2,3,27,28,36,42,43,44,49,17,18,24,25,26)]
> View(survey_clean)
> summary(survey_clean)
Age Gender BMI TIER StressFactor
Min. :30.00 Female: 74 Min. :16.60 Length:321 Length:321
1st Qu.:33.00 Male :247 1st Qu.:23.84 Class :character Class :character
Median :37.00 Median :25.95 Mode :character Mode :character
Mean :38.03 Mean :26.34
3rd Qu.:43.00 3rd Qu.:28.39
Max. :58.00 Max. :44.62
LifeStyle Smoke Drink Health FamilyHist
Length:321 Length:321 Length:321 Length:321 Length:32
Class :character Class :character Class :character Class :character Class :cha
Mode :character Mode :character Mode :character Mode :character Mode :cha

JobFunction MaritalStatus FitnessDevice


Length:321 Divorced: 2 No :259 Cash Vouchers or Coupons
Class :character Married :285 Yes: 62 I will not share
Mode :character Single : 34 Insurer offers free health check-ups
Insurer offers incentives like Gym mem
Insurer offers me discount on premium
> write.csv(survey_clean,"survey_clean.csv")
> survey_clean$Discount[survey_clean$ShareData == "Cash Vouchers or Coupons"] = "Yes"
> survey_clean$Discount[survey_clean$ShareData == "Insurer offers free health check-up
> survey_clean$Discount[survey_clean$ShareData == "Insurer offers incentives like Gym m
> survey_clean$Discount[survey_clean$ShareData == "Insurer offers me discount on premi
> survey_clean$Discount[survey_clean$ShareData == "I will not share"] = "No"
> survey_clean_discount <- survey_clean[,c(1:13,15)]
> View(survey_clean_discount)
> write.csv(survey_clean,"survey_clean_discount.csv")

Вам также может понравиться