Вы находитесь на странице: 1из 7

N1 = as.numeric(length(unique(data.

1$job_id)))
N2 = as.numeric(length(unique(data.1$ct_string_id)))
library(plyr)
library(dplyr)
library(ggplot2)
# String Analysis by Overall ---------------------------------------------## 1
SA_Ove_agg = plyr::ddply(data.1,.(ct_material_id),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_Ove_agg=arrange(SA_Ove_agg,desc(Count_ID))
SA_Ove_agg$String_type = c(1:length(SA_Ove_agg$ct_material_id))
SA_O=ggplot(data=SA_Ove_agg, aes(x=String_type, y=Percentage_ID,fill=ct_material
_id)) +
geom_bar(stat="identity")
SA_O=SA_O+xlab("String Type") + ylab("Percentage of Jobs") +
ggtitle("String Analysis by Job Materials")+
coord_cartesian(xlim=c(1,9)) +
scale_x_continuous(breaks=seq(1, 9, 1))+
coord_cartesian(ylim=c(0,1)) +
scale_y_continuous(breaks=seq(0, 1, 0.1))+
scale_fill_discrete(name="String Type")+theme_bw()
SA_O
ggsave("String_Ana_Overal.png",plot=SA_O, width=9, height=4, dpi=100)
## 2
data.2 = data.1[,c("job_id","ct_string_id","ct_material_id","Country Name","Fin
al_jobtype","State Name","Offshore Well","ct_app_factor","ct_avg_fatigue")]
cnames=c("job_id","ct_string_id","ct_material_id","Country_Name","Final_jobtype"
,"State_Name","Offshore_Well","ct_app_factor","ct_avg_fatigue")
colnames(data.2)=cnames
data.2$Country_Name[data.2$Country_Name=="United States of America"]<-"USA"
SA_Ove_agg.1 = plyr::ddply(data.2,.(ct_material_id,Country_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_C=ggplot(data=SA_Ove_agg.1, aes(x=ct_material_id, y=Count_ID,fill=Country_Nam
e)) +
geom_bar(stat="identity")
SA_C=SA_C+xlab("String Type") + ylab("Number of Jobs") +
ggtitle("String Analysis by Country")+
coord_cartesian(ylim=c(0,1600)) +
scale_y_continuous(breaks=seq(0, 1600, 100))+
scale_fill_discrete(name="Country Name")+theme_bw()
SA_C
ggsave("String_Ana_Country.png",plot=SA_C, width=9, height=4, dpi=100)
## Adding Pichart QT-900
library(scales)
pie.string = subset(data.2,ct_material_id=="QT-900",select = "Country_Name")

pie.string$Country_Name <- reorder(pie.string$Country_Name, X = pie.string$Count


ry_Name, FUN = function(x) -length(x))
at <- nrow(pie.string) - as.numeric(cumsum(sort(table(pie.string)))-0.5*sort(tab
le(pie.string)))
label=paste0(round(sort(table(pie.string))/sum(table(pie.string)),3) * 100,"%")
p <- ggplot(pie.string,aes(x="", fill=Country_Name)) +
geom_bar(width = 1) +
coord_polar(theta="y") +
annotate(geom = "text", y = at, x = 1, label = label)+
theme_bw()+ggtitle("Percentage of Jobs by Country for QT-900")
ggsave("String_Ana_Country_Pie.png",plot=p, width=9, height=4, dpi=100)
## 3 Top 10 Job Type (Excluding Blanks)
cnames=c("job_id","ct_string_id","ct_material_id","Country_Name","Final_jobtype"
,"State_Name")
data.3 = data.2[data.2$Final_jobtype %in% t10_jo, ]
SA_Ove_agg.2 = plyr::ddply(data.3,.(ct_material_id,Final_jobtype),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_JT=ggplot(data=SA_Ove_agg.2, aes(x=ct_material_id, y=Count_ID,fill=Final_jobt
ype)) +
geom_bar(stat="identity")
SA_JT=SA_JT+xlab("String Type") + ylab("Number of Jobs") +
ggtitle("String Analysis by Top 10 Jpb type")+
coord_cartesian(ylim=c(0,700)) +
scale_y_continuous(breaks=seq(0, 700, 50))+
scale_fill_discrete(name="Job Type")
SA_JT
ggsave("String_Ana_Job_type.png",plot=SA_JT, width=9, height=4, dpi=100)
## 4 Top 10 States (Excluding Blanks)
top_10_state = as.data.frame(table(data.2$State_Name))
top_10_state = arrange(top_10_state,desc(Freq))
top_10_state = top_10_state[1:10,1]
data.4 = data.2[data.2$State_Name %in% top_10_state, ]
SA_Ove_agg.3 = plyr::ddply(data.4,.(ct_material_id,State_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_ST=ggplot(data=SA_Ove_agg.3, aes(x=ct_material_id, y=Count_ID,fill=State_Name
)) +
geom_bar(stat="identity")
SA_ST=SA_ST+xlab("String Type") + ylab("Number of Jobs") +
ggtitle("String Analysis by States")+
coord_cartesian(ylim=c(0,1200)) +
scale_y_continuous(breaks=seq(0, 1200, 100))+
scale_fill_discrete(name="State Names")+
theme_bw()
SA_ST

ggsave("String_Ana_States.png",plot=SA_ST, width=9, height=4, dpi=100)


## Adding Pichart QT-900
library(scales)
pie.string.1 = subset(data.4,ct_material_id=="QT-900",select = "State_Name")
pie.string.1$State_Name <- reorder(pie.string.1$State_Name, X = pie.string.1$Sta
te_Name, FUN = function(x) -length(x))
at <- nrow(pie.string.1) - as.numeric(cumsum(sort(table(pie.string.1)))-0.5*sort
(table(pie.string.1)))
label=paste0(round(sort(table(pie.string.1))/sum(table(pie.string.1)),3) * 100,"
%")
p.1 <- ggplot(pie.string.1,aes(x="", fill=State_Name)) +
geom_bar(width = 1) +
coord_polar(theta="y") +
annotate(geom = "text", y = at, x = 1, label = label)+
theme_bw()+
ggtitle("Percentage of Jobs by States for QT-900")
ggsave("String_Ana_States_Pie.png",plot=p.1, width=9, height=4, dpi=100)
## 5 Offshore Well
SA_Ove_agg.4 = plyr::ddply(data.2,.(ct_material_id,Offshore_Well),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
SA_Ove_agg.4$Offshore_Well_1 = ifelse(SA_Ove_agg.4$Offshore_Well=="Yes","Offshor
e Well",
ifelse(SA_Ove_agg.4$Offshore_Well=="No","O
nshore Well","Not Applicable"))
SA_OW=ggplot(data=SA_Ove_agg.4, aes(x=ct_material_id, y=Count_ID,fill=Offshore_W
ell_1)) +
geom_bar(stat="identity")
SA_OW=SA_OW+xlab("String Type") + ylab("Number of Jobs") +
ggtitle("String Analysis by Offshore Well")+
coord_cartesian(ylim=c(0,1300)) +
scale_y_continuous(breaks=seq(0, 1300, 100))+
scale_fill_discrete(name="Well type")
SA_OW
ggsave("String_Ana_Off_well.png",plot=SA_OW, width=9, height=4, dpi=100)
# Job Analysis -----------------------------------------------------------## Top 10 job type selected for the analysis and Exclude Blanks
data.1$Final_jobtype[data.1$Final_jobtype=="Perforating/Fracturing"]<-"Fracturin
g"
table(data.1$Final_jobtype)
JA_Ove_agg = plyr::ddply(data.1,.(Final_jobtype),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)

JA_Ove_agg=arrange(JA_Ove_agg,desc(Count_ID))
top_jt = JA_Ove_agg[2:11,1]
JA_Ove_agg = JA_Ove_agg[JA_Ove_agg$Final_jobtype %in% top_jt,]
JA_O=ggplot(data=JA_Ove_agg, aes(x=Final_jobtype, y=Count_ID,fill=Final_jobtype)
) +
geom_bar(stat="identity")
JA_O=JA_O+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Job types)")+
scale_fill_discrete(name="Job Type")+
coord_cartesian(ylim=c(0,275)) +
scale_y_continuous(breaks=seq(0, 275, 25))+
theme(panel.background = element_blank())
JA_O
ggsave("Job_Ana_Overal.png",plot=JA_O, width=9, height=4, dpi=100)
## 2
data.to10 = data.2[data.2$Final_jobtype %in% top_jt,]
JA_Ove_agg.1 = plyr::ddply(data.to10,.(Final_jobtype,Country_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_C=ggplot(data=JA_Ove_agg.1, aes(x=Final_jobtype, y=Count_ID,fill=Country_Name
)) +
geom_bar(stat="identity")
JA_C=JA_C+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis (Top 10 Jobs) by Country")+
coord_cartesian(ylim=c(0,300)) +
scale_y_continuous(breaks=seq(0, 300, 30))+
scale_fill_discrete(name="Country Name")
JA_C
ggsave("Job_Ana_Country.png",plot=JA_C, width=9, height=4, dpi=100)
## 3 Top 10 States (Excluding Blanks)
top_10_state = as.data.frame(table(data.2$State_Name))
top_10_state = arrange(top_10_state,desc(Freq))
top_10_state = top_10_state[1:10,1]
data.top_stat = data.to10[data.to10$State_Name %in% top_10_state, ]
JA_Ove_agg.3 = plyr::ddply(data.top_stat,.(Final_jobtype,State_Name),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_ST=ggplot(data=JA_Ove_agg.3, aes(x=Final_jobtype, y=Count_ID,fill=State_Name)
) +
geom_bar(stat="identity")
JA_ST=JA_ST+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Top 10 States accross Country")+
coord_cartesian(ylim=c(0,250)) +
scale_y_continuous(breaks=seq(0, 250, 25))+
scale_fill_discrete(name="State Names")
JA_ST
ggsave("Job_Ana_States.png",plot=JA_ST, width=9, height=4, dpi=100)

## 5 Offshore Well
JA_Ove_agg.4 = plyr::ddply(data.to10,.(Final_jobtype,Offshore_Well),summarize,
Count_ID = length(job_id),
Percentage_ID = length(job_id)/N1,
Count_String = length(unique(ct_string_id)),
Percentage_String = length(unique(ct_string_id))/N2)
JA_Ove_agg.4$Offshore_Well_1 = ifelse(JA_Ove_agg.4$Offshore_Well=="Yes","Offshor
e Well",
ifelse(JA_Ove_agg.4$Offshore_Well=="No","O
nshore Well","Not Applicable"))
JA_OW=ggplot(data=JA_Ove_agg.4, aes(x=Final_jobtype, y=Count_ID,fill=Offshore_We
ll_1)) +
geom_bar(stat="identity")
JA_OW=JA_OW+xlab("Job Type") + ylab("Number of Jobs") +
ggtitle("Job Analysis by Offshore Well")+
coord_cartesian(ylim=c(0,300)) +
scale_y_continuous(breaks=seq(0, 300, 30))+
scale_fill_discrete(name="Well type")
JA_OW
ggsave("Job_Ana_Off_well.png",plot=JA_OW, width=9, height=4, dpi=100)
# Corrosion Analysis -----------------------------------------------------CA_Ove_agg = plyr::ddply(data.1,.(ct_material_id),summarize,
#CT_corrosive = sum(ct_corrosive),
Co2_Jobs = sum(ct_co2_jobs),
H2s_Jobs = sum(ct_h2s_jobs),
Acid_Jobs = sum(ct_acid_jobs))
library(reshape2)
CA_Ove_agg_t<-melt(CA_Ove_agg,id.vars="ct_material_id")
CA_Ove_agg_t <-arrange(CA_Ove_agg_t,desc(variable))
CA_O=ggplot(data=CA_Ove_agg_t, aes(x=variable, y=value,fill=ct_material_id)) +
geom_bar(stat="identity")
CA_O=CA_O+xlab("corrosion-related") + ylab("Count of the number of Jobs") +
ggtitle("Corrosion Analysis by Overall")+
scale_fill_discrete(name="String Type")+
coord_cartesian(ylim=c(0,4000)) +
scale_y_continuous(breaks=seq(0, 4000, 200))
CA_O
ggsave("Cor_Ana_Overal.png",plot=CA_O, width=9, height=4, dpi=100)
## By country
data.corr = data.1[,c("job_id","ct_string_id","ct_material_id","Country Name","
Final_jobtype","State Name","Offshore Well","ct_corrosive","ct_acid_jobs","ct_co
2_jobs","ct_h2s_jobs")]
cnames=c("job_id","ct_string_id","ct_material_id","Country_Name","Final_jobtype"
,"State_Name","Offshore_Well","ct_corrosive","ct_acid_jobs","ct_co2_jobs","ct_h2
s_jobs")
colnames(data.corr)=cnames
CA_Ove_agg.county = plyr::ddply(data.corr,.(Country_Name),summarize,
#CT_corrosive = sum(ct_corrosive),
Co2_Jobs = sum(ct_co2_jobs),
H2s_Jobs = sum(ct_h2s_jobs),
Acid_Jobs = sum(ct_acid_jobs))

library(reshape2)
CA_Ove_agg.county_t<-melt(CA_Ove_agg.county,id.vars="Country_Name")
CA_C=ggplot(data=CA_Ove_agg.county_t, aes(x=Country_Name, y=value,fill=variable)
) +
geom_bar(stat="identity")
CA_C=CA_C+xlab("corrosion-related") + ylab("Count of the number of Jobs") +
ggtitle("Corrosion Analysis by Country")+
scale_fill_discrete(name="Country Type")+
coord_cartesian(ylim=c(0,4000)) +
scale_y_continuous(breaks=seq(0, 4000, 250))
CA_C
ggsave("Cor_Ana_Country.png",plot=CA_C, width=9, height=4, dpi=100)
# Scatter plot -----------------------------------------------------------### CTR Depth
CTR_Depth = CT_OD[,c("job_id","Country_Name","CT_Ctr_Depth","ct_string_len")]
## Remove NA
CTR_Depth = CTR_Depth[!(CTR_Depth$CT_Ctr_Depth=="N.A"),]
CTR_Depth$CT_Ctr_Depth=as.numeric(CTR_Depth$CT_Ctr_Depth)
str(CTR_Depth)
max(CTR_Depth$CT_Ctr_Depth)
##Removing Outlier
CTR_Depth = CTR_Depth[!(max(CTR_Depth)),]
CTR_Depth = arrange(CTR_Depth,desc(CT_Ctr_Depth))
CTR_Depth = CTR_Depth[c(7:1941),]
QQ=ggplot(CTR_Depth, aes(x=ct_string_len, y=CT_Ctr_Depth)) +
geom_point(shape=1)
QQ=QQ+xlab("CT String Len") + ylab("CT CTR Depth") +
ggtitle("Scatter Plot by String Len vs CT CTR Depth")+
theme_bw()
QQ
ggsave("Scatter_Ana.png",plot=QQ, width=9, height=4, dpi=100)
QQ1=ggplot(CTR_Depth, aes(x=ct_string_len, y=CT_Ctr_Depth,color=Country_Name)) +
geom_point(shape=1)
QQ1=QQ1+xlab("CT String Len") + ylab("CT CTR Depth") +
ggtitle("Scatter Plot by String Len vs CT CTR Depth")+
theme_bw()
QQ1
ggsave("Scatter_Ana.1.png",plot=QQ1, width=9, height=4, dpi=100)
range(CT_OD$CT_Ctr_Depth)
dim(CT_OD)
# Neeraj Request ----------------------------------------------------------

nee_data = data.1[,c("job_id","ct_material_id","CT Depth","CT Reel Diam","CT Ree


l Width")]
nam =c("job_id","ct_material_id","CT_Depth","CT_Reel_Diam","CT_Reel_Width")
colnames(nee_data)=nam
nee_data.1 = nee_data[!(nee_data$CT_Reel_Diam %in% c("N.A")),]
NE=ggplot(data = nee_data.1, aes(x=ct_material_id, y=CT_Reel_Diam))
NE=NE+geom_boxplot(aes(fill=ct_material_id))
NE=NE+xlab("String Type") + ylab("Application Factor") +
ggtitle("Application Factor by Overall")+
#coord_cartesian(ylim=c(0,4)) +
#scale_y_continuous(breaks=seq(0, 10, 0.5))+
scale_fill_discrete(name="String Type")
NE
ggsave("String_Ana_Overal.png",plot=SA_O, width=9, height=4, dpi=100)

Вам также может понравиться