Вы находитесь на странице: 1из 13

title:Practice#2

Author:Salmina Olga,141 group


library("psych")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
##
filter, lag
## The following objects are masked from 'package:base':
##
##
intersect, setdiff, setequal, union
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
##
%+%, alpha
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
##
nasa
library("stats")
library("nortest")
library("sm")
## Package 'sm', version 2.2-5.4: type help(sm) for summary information
library("moments")
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary
Statistics Tables.
##

R package version 5.2. http://CRAN.R-project.org/package=stargazer

library(car)

##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
##
recode
## The following object is masked from 'package:psych':
##
##
logit
library(outliers)
##
## Attaching package: 'outliers'
## The following object is masked from 'package:psych':
##
##
outlier
d = datasets::cars
d$dist<-as.numeric(d$dist)

1.1. : dist (
) speed ( , ). a.
, ?
.b. ,
, ?
R, .
,
, a.
qqnorm(d$dist)

qqPlot(d$dist, col=palette()[1], pch=19,


xlab=" ",
ylab=" ",
main=" ")

qqnorm(d$speed)

qqPlot(d$speed, col=palette()[1], pch=19,


xlab=" ",
ylab=" ",
main=" ")

sm.density(d$speed, model = "Normal", xlab=" ",


ylab=" ")

sm.density(d$dist, model = "Normal", xlab=" ",


ylab=" ")

lillie.test(d$dist)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: d$dist
## D = 0.12675, p-value = 0.04335
shapiro.test(d$dist)
##
## Shapiro-Wilk normality test
##
## data: d$dist
## W = 0.95144, p-value = 0.0391
lillie.test(d$speed)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: d$speed
## D = 0.068539, p-value = 0.8068
shapiro.test(d$speed)
##
## Shapiro-Wilk normality test
##
## data: d$speed
## W = 0.97765, p-value = 0.4576

, . speed ,
0.05
(p-value 0.4576, 0.8068). dist
: p-value 0.0391, 0.04335.
1.2. a. ,
b. ,
(). , ,
?
c. ().
?
qplot(data=d, dist,
xlab = " ()",
ylab = " ",
main = " 1920")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(data=d, speed,
xlab = " ()",
ylab = " ",
main = " 1920")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

boxplot(d$dist)

boxplot(d$speed)

grubbs.test(d$dist, type = 10)

##
## Grubbs test for one outlier
##
## data: d$dist
## G = 2.98880, U = 0.81397, p-value = 0.04413
## alternative hypothesis: highest value 120 is an outlier
dist_whithout <- subset(d$dist, d$dist<119)
shapiro.test(dist_whithout)
##
## Shapiro-Wilk normality test
##
## data: dist_whithout
## W = 0.95909, p-value = 0.08692

, 120
. , ,
.
p-value = 0.08692. 1.3. d.
- ,
? (),
. e. ,
?
m.null <- lm(d$dist+1~1)
bc.null <- boxCox(m.null)

bc.null.opt <- bc.null$x[which.max(bc.null$y)]


paste(" -:",bc.null.opt)
## [1] " -: 0.464646464646465"
dist_new<-(cars$dist^0.464-1/0.464)
boxplot(dist_new)

shapiro.test(dist_new)
##
## Shapiro-Wilk normality test
##
## data: dist_new
## W = 0.993, p-value = 0.9911

d.

e.

,
,
-, ,
. e. p-value - 0.9911
0.0391,
. 1.4.
,
() . ,
, ,
( ).

d2 <- mutate(d, speed = speed, dist = dist_new)


qplot(data = d2, speed, dist) + stat_smooth(method = "lm")

cor(d2$dist, d2$speed)
## [1] 0.8433388

f.
g.
h.
i.
j.

(
0.8433388). 1.5.
() .
.
,
, ( )
? ?
99%- .
.
.
?

model <- lm(data = cars, dist ~ speed)


summary(model)
##
##
##
##
##
##
##
##
##
##

Call:
lm(formula = dist ~ speed, data = cars)
Residuals:
Min
1Q
-29.069 -9.525

Median
-2.272

3Q
9.215

Max
43.201

Coefficients:
Estimate Std. Error t value Pr(>|t|)

##
##
##
##
##
##
##
##

(Intercept) -17.5791
6.7584 -2.601
0.0123 *
speed
3.9324
0.4155
9.464 1.49e-12 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 15.38 on 48 degrees of freedom
Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12

confint(model, level=0.99)
##
0.5 %
99.5 %
## (Intercept) -35.706610 0.5484205
## speed
2.817919 5.0468988
y_hat <- fitted(model)
summary(y_hat)
##
##

Min. 1st Qu.


-1.849 29.610

Median
41.410

plot(y_hat~cars$speed)

y <- cars$dist
RSS <- deviance(model)
TSS <- sum((y - mean(y))^2)
R2 <- 1 - RSS/TSS
cor(y, y_hat)^2
## [1] 0.6510794

Mean 3rd Qu.


42.980 57.140

Max.
80.730

X <- model.matrix(model)
nd <- data.frame(speed = c(8, 7,18))
predict(model, nd)
##
1
## 13.880175

2
3
9.947766 53.204263

qplot(data = d2, speed, dist) + stat_smooth(method = "lm")


1 3.93.
0, beta_1 = -17.58.
0, . ,
0, 4.47
. C 99% beta_1 -35.71 0.55,
beta_2 - -2.82 5.05. R^2 0.65,
(ESS/TSS, ESS , TSS - ). , 65%.
8 - 13.880175 ,
7 - 9.947766 , 18
- 53.204263 .