Академический Документы
Профессиональный Документы
Культура Документы
1$job_id)))
N2 = as.numeric(length(unique(data.1$ct_string_id)))
library(plyr)
library(dplyr)
library(ggplot2)
# String Analysis by Overall ---------------------------------------------## 1
SA_Ove_agg = plyr::ddply(data.1,.(ct_material_id),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_Ove_agg=arrange(SA_Ove_agg,desc(Count_ID))
SA_Ove_agg$String_type = c(1:length(SA_Ove_agg$ct_material_id))
SA_O=ggplot(data=SA_Ove_agg, aes(x=String_type, y=Percentage_ID,fill=ct_material
_id)) +
geom_bar(stat="identity")
SA_O=SA_O+xlab("String Type") + ylab("Percentage of Jobs") +
ggtitle("String Analysis by Job Materials")+
coord_cartesian(xlim=c(1,9)) +
scale_x_continuous(breaks=seq(1, 9, 1))+
coord_cartesian(ylim=c(0,1)) +
scale_y_continuous(breaks=seq(0, 1, 0.1))+
scale_fill_discrete(name="String Type")+theme_bw()
SA_O
ggsave("String_Ana_Overal.png",plot=SA_O, width=9, height=4, dpi=100)
## 2
data.2 = data.1[,c("job_id","ct_string_id","ct_material_id","Country Name","Fin
al_jobtype","State Name","Offshore Well","ct_app_factor","ct_avg_fatigue")]
cnames=c("job_id","ct_string_id","ct_material_id","Country_Name","Final_jobtype"
,"State_Name","Offshore_Well","ct_app_factor","ct_avg_fatigue")
colnames(data.2)=cnames
data.2$Country_Name[data.2$Country_Name=="United States of America"]<-"USA"
SA_Ove_agg.1 = plyr::ddply(data.2,.(ct_material_id,Country_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_C=ggplot(data=SA_Ove_agg.1, aes(x=ct_material_id, y=Count_ID,fill=Country_Nam
e)) +
geom_bar(stat="identity")
SA_C=SA_C+xlab("String Type") + ylab("Number of Jobs") +
ggtitle("String Analysis by Country")+
coord_cartesian(ylim=c(0,1600)) +
scale_y_continuous(breaks=seq(0, 1600, 100))+
scale_fill_discrete(name="Country Name")+theme_bw()
SA_C
ggsave("String_Ana_Country.png",plot=SA_C, width=9, height=4, dpi=100)
## Adding Pichart QT-900
library(scales)
pie.string = subset(data.2,ct_material_id=="QT-900",select = "Country_Name")
JA_Ove_agg=arrange(JA_Ove_agg,desc(Count_ID))
top_jt = JA_Ove_agg[2:11,1]
JA_Ove_agg = JA_Ove_agg[JA_Ove_agg$Final_jobtype %in% top_jt,]
JA_O=ggplot(data=JA_Ove_agg, aes(x=Final_jobtype, y=Count_ID,fill=Final_jobtype)
) +
geom_bar(stat="identity")
JA_O=JA_O+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Job types)")+
scale_fill_discrete(name="Job Type")+
coord_cartesian(ylim=c(0,275)) +
scale_y_continuous(breaks=seq(0, 275, 25))+
theme(panel.background = element_blank())
JA_O
ggsave("Job_Ana_Overal.png",plot=JA_O, width=9, height=4, dpi=100)
## 2
data.to10 = data.2[data.2$Final_jobtype %in% top_jt,]
JA_Ove_agg.1 = plyr::ddply(data.to10,.(Final_jobtype,Country_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_C=ggplot(data=JA_Ove_agg.1, aes(x=Final_jobtype, y=Count_ID,fill=Country_Name
)) +
geom_bar(stat="identity")
JA_C=JA_C+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis (Top 10 Jobs) by Country")+
coord_cartesian(ylim=c(0,300)) +
scale_y_continuous(breaks=seq(0, 300, 30))+
scale_fill_discrete(name="Country Name")
JA_C
ggsave("Job_Ana_Country.png",plot=JA_C, width=9, height=4, dpi=100)
## 3 Top 10 States (Excluding Blanks)
top_10_state = as.data.frame(table(data.2$State_Name))
top_10_state = arrange(top_10_state,desc(Freq))
top_10_state = top_10_state[1:10,1]
data.top_stat = data.to10[data.to10$State_Name %in% top_10_state, ]
JA_Ove_agg.3 = plyr::ddply(data.top_stat,.(Final_jobtype,State_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_ST=ggplot(data=JA_Ove_agg.3, aes(x=Final_jobtype, y=Count_ID,fill=State_Name)
) +
geom_bar(stat="identity")
JA_ST=JA_ST+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Top 10 States accross Country")+
coord_cartesian(ylim=c(0,250)) +
scale_y_continuous(breaks=seq(0, 250, 25))+
scale_fill_discrete(name="State Names")
JA_ST
ggsave("Job_Ana_States.png",plot=JA_ST, width=9, height=4, dpi=100)
## 5 Offshore Well
JA_Ove_agg.4 = plyr::ddply(data.to10,.(Final_jobtype,Offshore_Well),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_Ove_agg.4$Offshore_Well_1 = ifelse(JA_Ove_agg.4$Offshore_Well=="Yes","Offshor
e Well",
ifelse(JA_Ove_agg.4$Offshore_Well=="No","O
nshore Well","Not Applicable"))
JA_OW=ggplot(data=JA_Ove_agg.4, aes(x=Final_jobtype, y=Count_ID,fill=Offshore_We
ll_1)) +
geom_bar(stat="identity")
JA_OW=JA_OW+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Offshore Well")+
coord_cartesian(ylim=c(0,300)) +
scale_y_continuous(breaks=seq(0, 300, 30))+
scale_fill_discrete(name="Well type")
JA_OW
ggsave("Job_Ana_Off_well.png",plot=JA_OW, width=9, height=4, dpi=100)
# Corrosion Analysis -----------------------------------------------------CA_Ove_agg = plyr::ddply(data.1,.(ct_material_id),summarize,
#CT_corrosive = sum(ct_corrosive),
Co2_Jobs = sum(ct_co2_jobs),
H2s_Jobs = sum(ct_h2s_jobs),
Acid_Jobs = sum(ct_acid_jobs))
library(reshape2)
CA_Ove_agg_t<-melt(CA_Ove_agg,id.vars="ct_material_id")
CA_Ove_agg_t <-arrange(CA_Ove_agg_t,desc(variable))
CA_O=ggplot(data=CA_Ove_agg_t, aes(x=variable, y=value,fill=ct_material_id)) +
geom_bar(stat="identity")
CA_O=CA_O+xlab("corrosion-related") + ylab("Count of the number of Jobs") +
ggtitle("Corrosion Analysis by Overall")+
scale_fill_discrete(name="String Type")+
coord_cartesian(ylim=c(0,4000)) +
scale_y_continuous(breaks=seq(0, 4000, 200))
CA_O
ggsave("Cor_Ana_Overal.png",plot=CA_O, width=9, height=4, dpi=100)
## By country
data.corr = data.1[,c("job_id","ct_string_id","ct_material_id","Country Name","
Final_jobtype","State Name","Offshore Well","ct_corrosive","ct_acid_jobs","ct_co
2_jobs","ct_h2s_jobs")]
cnames=c("job_id","ct_string_id","ct_material_id","Country_Name","Final_jobtype"
,"State_Name","Offshore_Well","ct_corrosive","ct_acid_jobs","ct_co2_jobs","ct_h2
s_jobs")
colnames(data.corr)=cnames
CA_Ove_agg.county = plyr::ddply(data.corr,.(Country_Name),summarize,
#CT_corrosive = sum(ct_corrosive),
Co2_Jobs = sum(ct_co2_jobs),
H2s_Jobs = sum(ct_h2s_jobs),
Acid_Jobs = sum(ct_acid_jobs))
library(reshape2)
CA_Ove_agg.county_t<-melt(CA_Ove_agg.county,id.vars="Country_Name")
CA_C=ggplot(data=CA_Ove_agg.county_t, aes(x=Country_Name, y=value,fill=variable)
) +
geom_bar(stat="identity")
CA_C=CA_C+xlab("corrosion-related") + ylab("Count of the number of Jobs") +
ggtitle("Corrosion Analysis by Country")+
scale_fill_discrete(name="Country Type")+
coord_cartesian(ylim=c(0,4000)) +
scale_y_continuous(breaks=seq(0, 4000, 250))
CA_C
ggsave("Cor_Ana_Country.png",plot=CA_C, width=9, height=4, dpi=100)
# Scatter plot -----------------------------------------------------------### CTR Depth
CTR_Depth = CT_OD[,c("job_id","Country_Name","CT_Ctr_Depth","ct_string_len")]
## Remove NA
CTR_Depth = CTR_Depth[!(CTR_Depth$CT_Ctr_Depth=="N.A"),]
CTR_Depth$CT_Ctr_Depth=as.numeric(CTR_Depth$CT_Ctr_Depth)
str(CTR_Depth)
max(CTR_Depth$CT_Ctr_Depth)
##Removing Outlier
CTR_Depth = CTR_Depth[!(max(CTR_Depth)),]
CTR_Depth = arrange(CTR_Depth,desc(CT_Ctr_Depth))
CTR_Depth = CTR_Depth[c(7:1941),]
QQ=ggplot(CTR_Depth, aes(x=ct_string_len, y=CT_Ctr_Depth)) +
geom_point(shape=1)
QQ=QQ+xlab("CT String Len") + ylab("CT CTR Depth") +
ggtitle("Scatter Plot by String Len vs CT CTR Depth")+
theme_bw()
QQ
ggsave("Scatter_Ana.png",plot=QQ, width=9, height=4, dpi=100)
QQ1=ggplot(CTR_Depth, aes(x=ct_string_len, y=CT_Ctr_Depth,color=Country_Name)) +
geom_point(shape=1)
QQ1=QQ1+xlab("CT String Len") + ylab("CT CTR Depth") +
ggtitle("Scatter Plot by String Len vs CT CTR Depth")+
theme_bw()
QQ1
ggsave("Scatter_Ana.1.png",plot=QQ1, width=9, height=4, dpi=100)
range(CT_OD$CT_Ctr_Depth)
dim(CT_OD)
# Neeraj Request ----------------------------------------------------------