Вы находитесь на странице: 1из 22

Practice of Introductory Time Series with R

YIK LUN, KEI

Reference:

Cowpertwait, Paul SP, and Andrew V. Metcalfe. Introductory time series with R. Springer Science & Business Media, 2009.

Onlinecourses.science.psu.edu,. ‘1.3 R Code For Two Examples In Lessons 1.1 And 1.2 | STAT 510’. N.p., 2015. Web. 10 Aug. 2015.

This paper is a practice from the book called Introductory Time Series with R by by Paul Cow- pertwait and Andrew Metcalfe, and from the website: onlinecourses.science.psu.edu/stat510/node/61. All R codes and some comments below are belonged to the book and the website. Dataset can be searched from Google. Fact
<-
function(n)
if
(n
==
1)
1
else
n
*
Fact(n - 1) # one line function
Fact(6) # 6*5*4*3*2*1=720

##

 720

data(AirPassengers) AP <- AirPassengers aggregate(AP,FUN=mean) # to remove any seasonal effects

## Time Series:

## Start = 1949

##

## Frequency = 1

End = 1960

 ##  126.6667 139.6667 170.1667 197.0000 225.0000 238.9167 284.0000 ##  328.2500 368.4167 381.0000 428.3333 476.1667

plot(AP, ylab = "Passengers (1000's)")

1

Passengers (1000's)

500

300

100

aggregate(AP)

4000

2000   1950

1952

1954

1956

1958

1960

Time

plot(aggregate(AP)) # plot the sum for each year    1950

1952

1954

1956

1958

1960

Time

2

boxplot(AP ~ cycle(AP))

500

300

100 1
2
3
4
5
6
7
8
9
10
11
12

data <- "http://rci.rutgers.edu/~rwomack/UNRATE.csv" data2<- "http://rci.rutgers.edu/~rwomack/CPIAUCSL.csv" unemployment <- read.csv(data, row.name=1) # convert first column to row name inflation <- read.csv(data2,row.name=1) # convert first column to row name un.month.ts <- ts(unemployment\$VALUE, start = c(1948,1), freq = 12) un.annual.ts <- aggregate(un.month.ts)/12 # mean annual rate in.month.ts <- ts(inflation\$VALUE, start = c(1948, 1), freq = 12) in.annual.ts <- aggregate(in.month.ts)/12 # mean annual rate plot(un.month.ts, ylab = "unemployed (%)")

3

10

unemployed (%)

8

6

4    1950 1960 1970 1980 1990 2000 2010 Time

plot(un.annual.ts, ylab = "unemployed (%)") 1950
1960
1970
1980
1990
2000
2010
unemployed (%)
3
4
5
6
7
8
9

Time

4

plot(in.month.ts, ylab = "inflation (%)")

15

10

inflation (%)

5

0

10

inflation (%)

5

0    1950 1960 1970 1980 1990 2000 2010 Time

plot(in.annual.ts, ylab = "inflation (%)")    1950

1960

1970

1980

1990

2000

2010

Time

5

fivemonths<- window(un.month.ts, start = c(1996,2), end = c(1996,6), freq = 12) fivemonths

 ## Feb Mar Apr May Jun ## 1996 5.5 5.5 5.6 5.6 5.3

un.month.ts<-ts(unemployment\$unemploy,start=c(1996,1),freq=12)

un.Feb <- window(un.month.ts, start = c(1996,2), freq = TRUE) # capture only Feb un.Aug <- window(un.month.ts, start = c(1996,8), freq = TRUE) # capture only Aug Feb.ratio <- mean(un.Feb) / mean(un.month.ts) # ratio = mean of Feb of all years / mean of all month of all years Aug.ratio <- mean(un.Aug) / mean(un.month.ts) Feb.ratio;Aug.ratio #On average, unemployment is 22% higher in February and 18% lower in August

##  1.222529

##  0.8163732

CBE <- read.csv("/Users/air/Desktop/Econ 144/Chapter01cbe.csv") Elec.ts <- ts(CBE[, 3], start = 1958, freq = 12) Beer.ts <- ts(CBE[, 2], start = 1958, freq = 12) Choc.ts <- ts(CBE[, 1], start = 1958, freq = 12) plot(cbind(Elec.ts, Beer.ts, Choc.ts))

cbind(Elec.ts, Beer.ts, Choc.ts) 1960
1965
1970
1975
1980
1985
1990
Choc.ts
Beer.ts
Elec.ts
2000
6000
100
200 2000
8000

Time

6

AP.elec <- ts.intersect(AP, Elec.ts) # only time frame that overlaps AP <- AP.elec[,1]; Elec <- AP.elec[,2] plot(AP, ylab = "Air passengers / 1000's") 1958.0
1958.5
1959.0
1959.5
1960.0
1960.5
1961.0
Time
Air passengers / 1000's
300
400
500
600

plot(Elec, ylab = "Electricity production / MkWh")

7

Electricity production / MkWh

2000

1600    1958.0 1958.5 1959.0 1959.5 1960.0 1960.5 1961.0 Time

plot(as.vector(AP), as.vector(Elec),xlab = "Air passengers / 1000's", ylab = "Electricity production / MWh") abline(reg = lm(Elec ~ AP),col="red") # not causation 300
350
400
450
500
550
600
Electricity production / MWh
1600
2000

Air passengers / 1000's

8

0.5

Global.ts

0.0

−0.5

−1.0

Global<-scan("http://elena.aut.ac.nz/~pcowpert/ts/global.dat") Global.ts <- ts(Global, st = c(1856, 1), end = c(2005, 12), freq = 12) Global.annual <- aggregate(Global.ts, FUN = mean) plot(Global.ts) 1900
1950
2000

Time

plot(Global.annual)

9

0.4

Global.annual

0.0

−0.4

0.8

0.4

New.series

0.0

−0.4 1900
1950
2000

Time

New.series <- window(Global.ts, start=c(1970, 1), end=c(2005, 12)) plot(New.series) abline(reg=lm(New.series ~ time(New.series)),col="red") # function ot time t   1970

1975

1980

1985

1990

1995

2000

2005

Time

10

plot(decompose(Elec.ts)) 1960
1965
1970
1975
1980
1985
1990
random seasonal
trend
observed
−600
0
600 −500
500 2000
10000 2000
12000

Time

Elec.decom <- decompose(Elec.ts, type = "mult") plot(Elec.decom)

11

Decomposition of multiplicative time series 1960
1965
1970
1975
1980
1985
1990
random seasonal
trend
observed
0.94
1.02
0.90
1.05
2000
10000 2000
12000

Time

Trend <- Elec.decom\$trend Seasonal <- Elec.decom\$seasonal ts.plot(cbind(Trend, Trend * Seasonal), lty = 1:2) # trend and seasonal effects 1960
1965
1970
1975
1980
1985
1990
2000
6000
10000
14000

Time

12

8

6

y

4

2

0 attach (Herald.dat)
x
<-
CO;
y
<-
Benzoa; n <-
length(x)
plot(x,y)
abline(h=mean(y),col="red")
abline(v=mean(x),col="blue")        5

10

15

20

x

sum((x - mean(x))*(y - mean(y))) / (n - 1) # covariance

##  5.511042

mean((x - mean(x)) * (y - mean(y))) # covariance

##  5.166602

cov(x, y) # covariance

##  5.511042

cov(x,y) / (sd(x)*sd(y)) # correlation

##  0.3550973

13

cor(x,y) # correlation

500

ts(waveht)

0

−500

##  0.3550973 0
100
200
300
400
Time

plot(ts(waveht[1:60]))

14 0
10
20
30
40
50
60
ts(waveht[1:60])
−600
−200
200
600

Time

acf(waveht)\$acf

Series waveht 0
5
10
15
20
25
ACF
−0.5
0.0
0.5
1.0

##  0.4702564

Lag

15

plot(waveht[1:396],waveht[2:397])

500

waveht[2:397]

0

−500 −500
0
500

waveht[1:396]

acf(waveht, type = c("covariance"))\$acf

Series waveht 0
5
10
15
20
25
ACF (cov)
−20000
20000
60000

Lag

16

1.0

0.6

ACF

0.2

−0.2

##  33328.39

data(AirPassengers) AP <- AirPassengers acf(AP)

Series AP            0.0

0.5

1.0

1.5

Lag

AP.decom <- decompose(AP, "multiplicative")

plot(ts(AP.decom\$random[7:138]))

17 0
20
40
60
80
100
120
ts(AP.decom\$random[7:138])
0.90
0.95
1.00
1.05
1.10

Time

acf(AP.decom\$random[7:138])

Series AP.decom\$random[7:138] 0
5
10
15
20
ACF
−0.2
0.2
0.6
1.0

Lag

#The ﬁgure suggests either a cosine shape that is whether a characteristic of an autoregressive model of order 2 or a not eﬀective seasonal adjustment.

18

The reduction in the standard deviation indicates that the seasonal adjustment has been very eﬀective.

sd(AP[7:138]) # standard deviation of the original series

##  109.4187

sd(AP[7:138] - AP.decom\$trend[7:138]) #original series subtracting the trend

##  41.11491

sd(AP.decom\$random[7:138]) # standard deviation after seasonal adjustment

##  0.0333884

mort=scan("http://anson.ucdavis.edu/~shumway/cmort.dat") plot(mort, type="o") # plot of mortality rate 0
100
200
300
400
500
mort
70
90
110
130

Index

mort=ts(mort) mortdiff=diff(mort,1) # creates a variable = x(t) – x(t-1) plot(mortdiff,type="o") # plot of first differences

19

20

10

mortdiff

0

−10

−20 0
100
200
300
400
500

Time

acf(mortdiff,xlim=c(1,24)) # plot of first differences, for 24 lags

Series mortdiff 5
10
15
20
ACF
−0.5
0.0
0.5
1.0

Lag

20

mortdifflag1=lag(mortdiff,-1)

plot(mortdifflag1,type="o")

20

10

mortdifflag1

0

−10

−20 0
100
200
300
400
500

Time

y=cbind(mortdiff,mortdifflag1) # bind first differences and lagged first differences mortdiffar1=lm(y[,1]~y[,2]) # AR(1) regression for first differences summary(mortdiffar1) # regression results

 ## ## Call: ## lm(formula = y[, 1] ~ y[, 2]) ## ## Residuals:
 ## Min 1Q Median 3Q Max ## -19.2758 -3.8753 -0.0953 3.5725 20.8169

## ## Coefficients:

##

## (Intercept) -0.04627

##

-0.50636

## --- ## Signif. codes:

##

## Residual standard error: 5.826 on 504 degrees of freedom

0.1

Estimate Std. Error t value Pr(>|t|)

0.25900

-0.179

0.858

0.03838 -13.195

0

'***'

0.001 '**'

0.01

y[,

2]

<2e-16 ***

'*' 0.05

'.'

##

## Multiple R-squared:

## F-statistic: 174.1 on 1 and 504 DF,

(2 observations deleted due to missingness)

0.2553

p-value: < 2.2e-16

21

'

'

1

acf(mortdiffar1\$residuals, xlim = c(1,24)) # ACF of residuals for 24 lags.

Series mortdiffar1\$residuals 5
10
15
20
ACF
0.0
0.4
0.8

Lag

22