You are on page 1of 44

Statistical Arbitrage

Li, Zhi
Q261821669

March 10, 2017


2
Contents

Preface i


1 FOMC 1
1.1 FOMC . . . . . . . . . . . . . . . . . . . . . . . . . . 1
1.2 FOMC . . . . . . . . . . . . . . . . . . . . . . . . . . 2
1.3 FOMC . . . . . . . . . . . . . . . . . . . . . . . . . . 3

2 7
2.1 HMM . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7
2.2 . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
2.3 features . . . . . . . . . . . . . . . . . . . . . . . 11
2.4 HMM . . . . . . . . . . . . . . . . . . . . . . . . . . 13

3 15
3.1 Tabu Search . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
3.2 Constraints . . . . . . . . . . . . . . . . . . . . . . . . . . . . 15
3.3 . . . . . . . . . . . . . . . . . . . . . . . . 16

4 21

3
4 CONTENTS

4.1 87 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 21
4.2 Joint Probability Table . . . . . . . . . . . . . . . . . . . . . . 22
4.3 . . . . . . . . . . . . . . . . . . . . . . . . 24

5 27
5.1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 27
5.2 . . . . . . . . . . . . . . . . . . . . . . . . 28

6 31
6.1 DCA . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 31
6.2 Modified DCA . . . . . . . . . . . . . . . . . . . . . . . . . . . 32
6.3 DCA . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34
6.4 DCA . . . . . . . . . . . . . . . . . . . . . . . . . . . 35
List of Figures

1.1 FOMC . . . . . . . . . . . . . . . . . . . . . . 3
1.2 SP500 . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
1.3 FOMC . . . . . . . . . . . . . . . . . . . . 5

2.1 . . . . . . . . . . . . . . . 14

3.1 . . . . . . . . . . . . . . . . . . . 20

4.1 . . . . . . . . . . . . . . . . . 26

5.1 . . . . . . . . . . . . . . . . . . 29

6.1 DCA . . . . . . . . . . . . . . . . . . . . 35

5
6 LIST OF FIGURES
Preface

R
2016
2010
long only2014R
QQ
R3mirrors
ZipinstallRHmm
mirrors

install.packages("RHmm", repos="http://R-Forge.R-project.org")

i
ii PREFACE
Chapter 1


FOMC

1.1
FOMC
(The Federal Open Market Committee )FOMC
FOMC arbitrage event arbitrage
quantmodtseriesforecast
performanceanalyticsfomcdatesFOMC

http://www.federalreserve.gov/monetarypolicy/fomccalendars.htm

quantmodSP500fomcdates
fomcdatesROC()
daily.returns

library(quantmod)
library(tseries)
library(forecast)

1
2 CHAPTER 1. FOMC

library(PerformanceAnalytics)
fomcdates <-
c("2011-01-26", "2011-03-15", "2011-04-27", "2011-06-22", "2011-08-09",
"2011-09-21", "2011-11-02", "2011-12-13", "2012-01-25", "2012-03-13",
"2012-04-25", "2012-06-20", "2012-08-01", "2012-09-13", "2012-10-24",
"2012-12-12", "2013-01-30", "2013-03-20", "2013-05-01", "2013-06-19",
"2013-07-31", "2013-09-18", "2013-10-30", "2013-12-18", "2014-01-29",
"2014-03-19", "2014-04-30", "2014-06-18", "2014-07-30", "2014-09-17",
"2014-10-29", "2014-12-17", "2015-01-28", "2015-03-18", "2015-04-29",
"2015-06-17", "2015-07-29", "2015-09-17", "2015-10-28", "2015-12-16",
"2016-01-27", "2016-03-16", "2016-04-27", "2016-06-15", "2016-07-27",
"2016-09-21")
symbol <- c("^GSPC")
symbolData <- new.env()
getSymbols(symbol, env = symbolData, src = "yahoo",
from=fomcdates[1], to=last(fomcdates))
sp500 <- do.call(cbind,eapply(symbolData, Cl))
daily.returns <- na.omit(ROC(sp500))

1.2
FOMC

510
FOMC
Figure(1.1)
1.3. FOMC 3

Figure 1.1: FOMC

fit <- ets(daily.returns,damped=FALSE)


spot.per <- spec.pgram(fit$fitted, taper=0, log="yes")
abline(v=10/1424, lty="dotted")

ets()fit
noisespec.pgram()fit$fitted

10SP500142410/1424
,
Figure(1.2)

1.3
FOMC
FOMCFOMC
FOMCindicator
temp 1 0
4 CHAPTER 1. FOMC

Figure 1.2: SP500


1.3. FOMC 5

Figure 1.3: FOMC

1temptemp
0,1
Rspline
time.intervalsFOMCtrading.days
FOMCFOMC
tempsignals
charts.PerformanceSummary()Figure(1.3)

temp <- rep(c(1,1,1,0,0,0,0,0,1,1),6)


time.intervals<-data.frame(head(fomcdates,-1),tail(fomcdates,-1))

trading.days <- apply(time.intervals, 1, FUN=function(x)


6 CHAPTER 1. FOMC

length(as.numeric(window(sp500,start=x[1], end=x[2])))-1)

signals <- unlist(lapply(trading.days, function(x) temp[1:x]))


charts.PerformanceSummary(signals*daily.returns)

70%5
0.7/5 = 0.142015FOMC
0.0855

Chapter 2

2.1 HMM
Hidden Markov ModelBaum-Welch reestima-
tion method MCMC Gibbs sam-
plingMetroplis algorithm

RSP500
2000-01-012015-01-012010-01-01
2000-01-012010-01-012010-01-022015-01-
01cross validation
in sampleout of sample
dailyRetinRetoutRet

7
8 CHAPTER 2.

library("quantmod")
library("PerformanceAnalytics")
library("RHmm")
library("zoo")

start = as.Date("2000-01-01")
end = as.Date("2015-01-01")
trainingEnd = as.Date("2010-01-01")

forward <- 10
days <- 5

symbol <- c("^GSPC")


symbolData <- new.env()
getSymbols(symbol, env = symbolData, src = "yahoo", from=start,
to=end)
mktdata <- eval(parse(text=paste("symbolData$",sub("^","",symbol,
fixed=TRUE))))
inData <- window(mktdata,start=start ,end=trainingEnd)
outData <- window(mktdata,start=trainingEnd +1)
dailyRet <- ROC(Cl(mktdata)) #Daily Returns
dailyRet[is.na(dailyRet)] <- 0.00001
inRet <- window(dailyRet,start=start,end=trainingEnd)
outRet <- window(dailyRet,start=trainingEnd +1)
2.2. 9

2.2
in sample
1010
10forward < 10
LongTrendSignalShortTrendSignal
10 CHAPTER 2.

ConvertTofullSignal <- function(signal){


results <- rep(0,length(signal))
intrade <- F
for(i in seq(1,length(signal))){
if(signal[i]==-1){ intrade <- F }
if(signal[i]==1 || intrade){
results[i]<-1
intrade <- T
}
}
return(results)
}

#Generate long trend signal


LongTrendSignal <- rep(0,nrow(inData))
for(i in seq(1,nrow(inData)-forward)){
dataBlock <- Cl(inData[seq(i,i+forward),])
if(coredata(Cl(inData[i,])) == min(coredata(dataBlock))){
LongTrendSignal[i] <- 1
}
if(coredata(Cl(inData[i,])) == max(coredata(dataBlock))){
LongTrendSignal[i] <- -1
}
}
LongTrendSignal <- ConvertTofullSignal(LongTrendSignal)
2.3. FEATURES 11

#Generate short trend signal


ShortTrendSignal <- rep(0,nrow(inData))
for(i in seq(1,nrow(inData)-forward)){
dataBlock <- Cl(inData[seq(i,i+forward),])
if(coredata(Cl(inData[i,])) == max(coredata(dataBlock))){
ShortTrendSignal[i] <- 1
}
if(coredata(Cl(inData[i,])) == min(coredata(dataBlock))){
ShortTrendSignal[i] <- -1
}
}
ShortTrendSignal <- ConvertTofullSignal(ShortTrendSignal)

2.3 features

features
high/low, high/open, high/close, open/close, low/close,
low/open FeatureMatrix()
longListfeaturesshortListfeatures

FeatureMatrix <- function(features,signal){


results <- list()
extract <- F
for(i in seq(1,length(signal))){
if(signal[i]==1 && !extract){
12 CHAPTER 2.

startIndex <- i
extract <- T
}
if(signal[i]==0 && extract){
endIndex <- i-1
dataBlock <- features[startIndex:endIndex,]
extract <- F
results[[length(results)+1]] <- as.matrix(dataBlock)
}
}
return(results)
}
#Generate features
features <- cbind(dailyRet,Hi(mktdata)/Lo(mktdata),
Hi(mktdata)/Op(mktdata),Hi(mktdata)/Cl(mktdata),
Op(mktdata)/Cl(mktdata),Lo(mktdata)/Cl(mktdata),
Lo(mktdata)/Op(mktdata))
inFeatures <- window(features,start=start, end=trainingEnd)
outFeatures <- window(features,start=trainingEnd+1)

#long / short features


longList <- FeatureMatrix(inFeatures,LongTrendSignal)
shortList <- FeatureMatrix(inFeatures,ShortTrendSignal)
2.4. HMM 13

2.4
HMM

features HMMFit()
LongModelFitShortModelFitout of
samplelong likelihoodshort likelihoodlikelihood
forwardBackward()
likelihood likelihood likelihood
longshort
shortingLag()

#Train the HMM models


set.seed(999)
LongModelFit = HMMFit(longList, nStates=4)
ShortModelFit = HMMFit(shortList, nStates=4)

longLikelihood <- rollapply(outFeatures, days,


align="right",fill = NA, by.column=F,
function(x) {forwardBackward(LongModelFit,
as.matrix(x))$LLH})
shortLikelihood <- rollapply(outFeatures, days,
align="right",fill = NA, by.column=F,
function(x) {forwardBackward(ShortModelFit,
as.matrix(x))$LLH})

#Calculate Out of Sample Returns


14 CHAPTER 2.

Figure 2.1:

longReturns <- Lag(longLikelihood > shortLikelihood)* outRet


longReturns[is.na(longReturns)] <- 0
charts.PerformanceSummary(longReturns)

Figure2.1
Chapter 3

3.1 Tabu Search


8

40320
8! = 40320

10

3.2 Constraints
local searchtabu list

15
16 CHAPTER 3.

constraints
2

if(sum(x)>2){
v=v-5
}

v fitness 5 2
fitness5

3.3
port1005r
7

rm(list=ls(all=TRUE))
library(PerformanceAnalytics)
library(quantmod)

stocks <- c("AAPL","ARMH","JPM","^GSPC", "VLO", "VAR","XEL")


nStocks <- length(stocks)
symbolData <- new.env()
getSymbols(stocks, env = symbolData, src = "yahoo",
from = "2012-01-01", to = "2016-01-01")
ClosePrices <- do.call(cbind,eapply(symbolData, Cl))

r <- na.omit(ROC(ClosePrices))
3.3. 17

nRows <- nrow(r)


r[is.na(r)] <- 0.00001
r[abs(r)>1] <- 0.00001

port <- matrix(rep(0,nRows*nStocks),nRows,nStocks)


w <- matrix(rep(0,nRows*nStocks),nRows,nStocks)
t <- 1
vstar <- 0
lstar <- min(20,nStocks/4)
tstar <- nrow(r)
x <- rep(1,nStocks)
xbest <- rep(0,nStocks)
l <- rep(0,nStocks)

port

headtaillocal searchwhile

local_search = function(y,vst,t){
improved=1
while (improved==1){
improved=0
for (i in 1:length(y)){
y[i]=1-y[i]
v=r[t,]%*%y
18 CHAPTER 3.

if(sum(y)>2){
v=v-5
}
if (v>vst) {
vst=v
improved=1
}else y[i]=1-y[i]
}
}
return(c(vst,y))
}

while (t<tstar){
vstst=-1000000
done=0
big_k=0
for (k in 1:nStocks){
if (l[k]==0){
x[k]=1-x[k]
v= r[t,]%*%x
if(sum(x)>2){
v=v-5
}
if (v > vstar){
big_k=k
3.3. 19

local=local_search(x,vstar,t)
vstar=local[1]
xbest=local[2:(nStocks+1)]
done=1
break
}
x[k]=1-x[k]
if (v > vstst){
big_k=k
vstst=v
}
}
}
if (done==0) x[big_k]=1-x[big_k]
for( i in 1:nStocks){l[i]=max(0,l[i]-1)}
l[big_k]=lstar
port[t,]=xbest
t=t+1
}

portwnext.r
rowSums()

w <- head(port,-1)
next.r <- tail(r,-1)
den <- rowSums(w)
den[which(den==0)] <- 1
20 CHAPTER 3.

Figure 3.1:

w <- w/den
ret <- rowSums(data.frame(w*next.r))
charts.PerformanceSummary(ret)
factorial(8)

Figure3.14
100%
Chapter 4

4.1 87

Vinces book, The Leverage Space Trading Model,
87

library(LSPM)
# Multiple strategy example (data found on pp. 84-87)
trades <- cbind(
c(-150,-45.33,-45.33,rep(13,5),rep(79.67,3),136),
c(253,-1000,rep(-64.43,3),253,253,448,rep(-64.43,3),253),
c(533,220.14,220.14,-500,533,220.14,799,220.14,-325,220.14,533,220.14))
probs <- c(rep(0.076923077,2),0.153846154,rep(0.076923077,9))
# Create a Leverage Space Portfolio object
port <- lsp(trades,probs)
port$f <- c(.1,.4,.25)

21
22 CHAPTER 4.

GHPR(port)#GHPR(.1,.4,.25) = 1.00491443
port$f <- c(.307, 0.0, .693)
GHPR(port)#GHPR(.307,0.0,.693) =1.249
port$f <- c(.304, 0.0, .696)
GHPR(port)#GHPR(.304, 0.0, .696) = 1.339

jointProbTable
LSMPjointProbTable

mktA<-c(617,664,673,751,887,849,781,851,942,834,804,789,791,813)
mktB<-c(2812,3260,3560,3360,3681,2946,2873,2899,2947,3069,2994,
2787,2817,3086)
mktC<-c(6189,6570,7369,7916,8199,8256,8573,8713,8388,8817,8938,
8545,9168,9410)
mydata<-cbind(mktA,mktB,mktC)

port2 <- jointProbTable(diff(mydata),n=12)


port2$f <- c(.631, 0.0, .767)
GHPR(port2)

4.2 Joint Probability Table


LSMPjointProbTable()

# Define JPT function


jointProbTable <- function(x, n=3, FUN=median,...) {
4.2. JOINT PROBABILITY TABLE 23

# Function to bin data


quantize <- function(x, n, FUN=median, ...) {
if(is.character(FUN)) FUN <- get(FUN)
bins <- cut(x, n, labels=FALSE)
res <- sapply(1:NROW(x), function(i) FUN(x[bins==bins[i]], ...))
}
# Allow for different values of n for each system in x
if(NROW(n)==1) {
n <- rep(n,NCOL(x))
} else
if(NROW(n)!=NCOL(x)) stop("invalid n")
# Bin data in x
qd <- sapply(1:NCOL(x), function(i) quantize(x[,i],n=n[i],FUN=FUN,...))
# Aggregate probabilities
probs <- rep(1/NROW(x),NROW(x))
res <- aggregate(probs, by=lapply(1:NCOL(qd), function(i) qd[,i]), sum)
# Clean up output, return lsp object
colnames(res) <- colnames(x)
res <- lsp(res[,1:NCOL(x)],res[,NCOL(res)])
return(res)
}
24 CHAPTER 4.

4.3
7
rGHPR (geometric holding
period return)

require(PerformanceAnalytics)
require(quantmod)
require(LSPM)

stocks <- c("AAPL","ARMH","JPM","^GSPC","VFC", "VLO", "VAR")


n <- length(stocks)
symbolData <- new.env()
getSymbols(stocks, env = symbolData, src = "yahoo",
from = "2015-01-01", to = "2016-01-01")
ClosePrices <- do.call(cbind,eapply(symbolData, Cl))
r <- na.omit(ROC(ClosePrices))

lspm.f()leverage
joint probability table
optim()

lspm.f <- function(rolling){


port <- jointProbTable(diff(rolling))
res <- optim(rep(.01,n),function(x){port$f<-x;-GHPR(port)},
method="L-BFGS-B",lower=0,upper=1)
return(res$par)
}
4.3. 25

rollapply
100
levlagw
r

days <- 100


lev <- rollapply(ClosePrices, days, lspm.f, by.column=FALSE,
align="right")
w <- lag(lev,15)
w <- w/rowSums(w)
w[is.nan(w)] <- 0
port.r <- rowSums(data.frame(r*w))
charts.PerformanceSummary(port.r)

portfolio returnport.rFigure4.1
15%
26 CHAPTER 4.

Figure 4.1:
Chapter 5

5.1
linear regression
neural network

ARMA

high/lowhigh/openhigh/closeopen/closelow/closelow/open
yx1x2x3x4x5x6

library(quantmod)
library(PerformanceAnalytics)
library(neuralnet)

27
28 CHAPTER 5.

symbol <- c("^GSPC")


symbolData <- new.env()
getSymbols(symbol, env = symbolData, src = "yahoo",
from="2010-01-01", to="2011-01-01")
mktdata <- eval(parse(text=paste("symbolData$",
sub("^","",symbol,fixed=TRUE))))
r <- ROC(Cl(mktdata)) #Daily Returns
r[is.na(r)] <- 0.00001

mydata <- cbind(r,Hi(mktdata)/Lo(mktdata),Hi(mktdata)/Op(mktdata),


Hi(mktdata)/Cl(mktdata),Op(mktdata)/Cl(mktdata),
Lo(mktdata)/Cl(mktdata),Lo(mktdata)/Op(mktdata))
names(mydata) <- c("y", "x1","x2","x3","x4","x5","x6")

5.2

signal()20neuralnet
x1-x6
010rollapply()signal
lag()signalFigure(5.1)
30%

signal <- function(rolling){


model <- neuralnet(y ~ x1+x2+x3+x4+x5+x6, rolling,
hidden=10,threshold=0.001)
results <- compute(model, tail(rolling[,2:7],1))
5.2. 29

Figure 5.1:

return(results$net.result>0)
}

set.seed(9)
days <-20
sig <- rollapply(mydata,days,signal,by.column=FALSE,align="right")
charts.PerformanceSummary(r*lag(sig,2))
30 CHAPTER 5.
Chapter 6

6.1 DCA

(Dollar Cost Averaging)


(Mean Reversion)
DCA
DCA

DCAself
financing

31
32 CHAPTER 6.

AB
50%A50%BAB
A50%B
50%A
B50%50%
(rebalance)AB
AB

DCA
Modified DCA

6.2 Modified DCA

DCA
hedge.weights()b
x(gross returns)e
tuning parameter+1
hedge.weights{b,x,e}

bi+1 = bi (xi xi ), (6.1)

gearing

max(bi xi e, 0)
= , 0 e 1, (6.2)
var(xi )
6.2. MODIFIED DCA 33

(6.2)e 1(6.1)
15
55
1(6.1)
simplex()R

#Modified Dollar Cost Average Weights


simplex <- function(v,z){
w <- rep(0, length(v) )
idx <- order(v,decreasing=TRUE)
u <- v[idx]
rho <-max(index(u)[index(u)*u >= cumsum(u) -z])
if(rho>0){
theta <-(sum(u[1:rho])-z)/rho
w[idx[1:rho]] <- v[idx[1:rho]] - theta;
}
w
}

hedge.weights <- function(b,x,e){


b<-as.vector(b)
x<-as.vector(x)
gearing <- max(b%*%x-e,0)/var(x)
b_new <- b - gearing*(x-mean(x))
b_new <- simplex(b_new,1)
}
34 CHAPTER 6.

6.3
DCA

self financed
wwi ii
r
ri
port.r

port.ri = wi ri , (6.3)

i
(6.3)wi w
for loophedge.weights()
n
for loop1n 1ri
nr
for loopR

for( i in 1:(n-1)){
b_new <- hedge.weights(w[i,], r[i,]+1, L0)
w[i+1,] <- b_new
}

dim(dimension)
L0etuning parameter

6.4. DCA 35

Figure 6.1: DCA

6.4
DCA

quantmod 5 5
TRV,XOM,CVX,PG,JPMClose2012-
01-012012-12-31 250 250
ROC()rL0
0.9
5DCAFigure(6.1)
22%DCA80100
36 CHAPTER 6.

5DCA
(insured portfolio)
mean reversionDCA

require(quantmod)
require(PerformanceAnalytics)

symbols <- c("TRV","XOM","CVX","PG","JPM") # portfolio


symbolData <- new.env()
getSymbols(symbols, env = symbolData, src = "yahoo",
from="2012-01-01", to="2012-12-31")
ClosePrices <- do.call(cbind,eapply(symbolData, Cl))

r <- na.omit(ROC(ClosePrices))
n <- nrow(r)
dim <- length(symbols)
w <- matrix(1/dim,ncol=dim,nrow=n) #20%
L0 <- .9 #threshold

for( i in 1:(n-1)){
b_new <- hedge.weights(w[i,], (r[i,]+1), L0)
w[i+1,] <- b_new
}

port.r <- rowSums(data.frame(w*r))


charts.PerformanceSummary(port.r)

You might also like