Вы находитесь на странице: 1из 5

Common STAT 101 Commands for RStudio

All the custom functions we have used since the beginning of the semester can be loaded into
RStudio using the following command:
source("http://stat.duke.edu/courses/Fall11/sta101.02/labs/custom101.R")

One Categorical Variable


table(x)
barplot(table(x))
simCI(x, conf.level, cat) # custom
#gives a bootstrap distribution and CI for proportion
#conf.level = confidence level (default: .95)
#cat = category of interest
simHT(x, null, cat) # custom
#simulation test for a proportion
#alternative = "two.sided","less", or "greater" (default: "two.sided")
#cat = category of interest
pnorm(z)
#gives probability in tail of N(0,1) below z
#lower.tail=FALSE for upper tail above z
qnorm(0.975)
#replace 0.975 with desired percentile of N(0,1)
prop.test(count,n, p=null.value)
#normal based inference for a single proportion
#conf.level = confidence level (default: .95)
#alternative = "two.sided","less", or "greater" (default: "two.sided")

One Quantitative Variable


hist(x)
#xlab = label for x-axis
#main = title of plot
mean(x)
#na.rm=TRUE to get rid of NA values
median(x)
#na.rm=TRUE to get rid of NA values
sd(x)
#na.rm=TRUE to get rid of NA values
summary(x)
simCI(x, conf.level, est) # custom
#gives a bootstrap distribution and CI for mean or median
#conf.level = confidence level (default: 0.95)

#est = "mean" or "median" (default:mean)


simHT(x, est, null, alternative)
#gives a randomization distribution and HT for mean or median
#est = "mean" or "median" (default:mean)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")
z.test(x, mu) # custom
#z-based inference for a single mean
#conf.level = confidence level (default: 0.95)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")
t.test(x, mu)
#t-based inference for a single mean
#conf.level = confidence level (default: 0.95)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")

Two Categorical Variables


table(x,y)
barplot(table(x,y))
#beside = TRUE for side-by-side barplot
#legend = TRUE to include a color legend
mosaicplot(table(x,y))
resample(y,group) # custom
#gives a bootstrap distribution and CI for difference in proportions
#conf.level = confidence level (default: 0.95)
#order = order for difference (for group variable)
#outcome = outcome for difference (for outcome variable)
reallocate(y,group) # custom
#randomization test for difference in proportions
#alternative = "two.sided","less", or "greater" (default: "two.sided")
#order = order for difference (for group variable)
#outcome = outcome for difference (for outcome variable)
pnorm(z)
#gives probability in tail of N(0,1) below z
#lower.tail=FALSE for upper tail above z
qnorm(0.975)
#replace 0.975 with desired percentile of N(0,1)
prop.test(c(count1, count2), c(n1,n2))
#normal based inference for a difference in proportions
#conf.level = confidence level (default: .95)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")

Two Quantitative Variables


plot(x,y)
#xlab = label for x-axis,
#main = title for plot

ylab = label for y-axis

cor(x,y)
#use = "complete.obs" to get rid of NA values
m = lm(y ~ x)
#linear regression with one predictor
#y = response, x = explanatory (predictor)
summary(m)
#gives the summary output for the linear regression of y vs. x
plot_ss(x,y)
#custom interactive function for playing around with
#minimizing sum of squared residuals

One Categorical and One Quantitative Variable

y = quantitative
x = categorical
by(y, x, mean)
#na.rm=TRUE to get rid of NA values
by(y, x, sd)
boxplot(y~x)
resample(y,group) # custom
#gives a bootstrap distribution and CI for difference in means
#y is a numerical variable, group is a categorical variable
#conf.level = confidence level (default: 0.95)
#order = order for difference (for group variable)
reallocate(y,group) # custom
#randomization test for difference in means
#y is a numerical variable, group is a categorical variable
#alternative = "two.sided","less", or "greater" (default: "two.sided")
#order = order for difference (for group variable)
z.test(x, y, mu) # custom
#z-based inference for difference in means
#x and y are both numerical variables
#conf.level = confidence level (default: 0.95)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")
t.test(x, y, mu)
#t-based inference for difference in means
#x and y are both numerical variables
#conf.level = confidence level (default: 0.95)
#alternative = "two.sided", "less", or "greater" (default: "two.sided")

Multiple Linear Regression


lm(y ~ x1 + x2 + ...)
#linear regression with many predictor
#y = response, x1, x2, ... = explanatory variables (predictors)
#data = dataframe
summary(m)
#gives summary output for the linear regression of y vs. x1, x2, ...
relevel(variable, ref = "ref.level")
#defines the reference level of a categorical variable
#see Lab 12 handout for usage

Subsetting
subset(dataname, !is.na(x))
#the data set "data", but only cases for which x is not NA
subset(dataname, x=="levelA")
#data "dataname", but only cases for which x is equal to "levelA"
x[!is.na(x)]
#the variable x, but only cases for which x is not NA
y[!is.na(x)]
#the variable y, but only cases for which x is not NA
x[x < 30]
#the variable x, but only cases for which x is less than 30
x[x != "levelA"]
#the variable x, but only cases for which x does not equal "levelA"
droplevels(x)
#drops empty levels if you have removed all the cases from one level

Miscallenous
abline(h = value)
#add a horizontal line to an existing plot
abline(v = value)
#add a vertical line to an existing plot
abline(lm(y~x))
#overlays linear regression line on the scatterplot of y vs. x
#only works is plot(x,y) ran first

Plotting parameters

These arguments can be passed to the plot(), or hist(), or other similar functions. To learn
more about all plotting parameters, type ?par.
main = "main title"
4

#title of plot, to be placed in the top center


xlab = "x-axis label"
#x-axis label
ylab = "y-axis label"
#y-axis label
xlim = c(min,max)
#x-axis limits
ylim = c(min,max)
#y-axis limits

Вам также может понравиться