Professional Documents
Culture Documents
Ramon Saccilotto
Universittsspital Basel
Hebelstrasse 10
www.ceb-institute.org
Author
ggplot2 was developed by Hadley Wickham, assistant
professor of statistics at Rice University, Houston. In July
2010 the latest stable release (Version 0.8.8) was published.
2008
Ph.D. (Statistics), Iowa State University, Ames, IA. Practical tools for exploring data and
models.
2004 M.Sc. (Statistics), First Class Honours, The University of Auckland, Auckland, New Zealand.
2002 B.Sc. (Statistics, Computer Science), First Class Honours, The University of Auckland, Auckland, New Zealand.
1999 Bachelor of Human Biology, First Class Honours, The University of Auckland, Auckland, New Zealand.
Tutorial
#### Sample code for the illustration of ggplot2
#### Ramon Saccilotto, 2010-12-08
### install & load ggplot library
install.package("ggplot2")
library("ggplot2")
### show info about the data
head(diamonds)
head(mtcars)
12000
10000
cut
Fair
8000
count
#qplot histogram
qplot(clarity, data=diamonds, fill=cut, geom="bar")
Good
6000
Very Good
Premium
4000
Ideal
2000
I1
SI2
SI1
VS2
clarity
IF
aesthetic
"green"
"red"
"blue"
30
25
25
4
5
mpg
mpg
levels(mtcars$cyl)
30
cyl
20
factor(cyl)
4
6
20
7
15
15
wt
wt
30
6
25
mpg
8
qsec
20
16
18
20
15
22
wt
# bar-plot
flips the plot after calculation of
14
14
12
12
10
10
factor(cyl)
4
6
0
4
# fill by variable
factor(cyl)
count
count
factor(cyl)
factor(cyl)
head(diamonds)
0.8
count
cut
Fair
0.6
Good
Very Good
0.4
Premium
Ideal
0.2
0.0
I1
5000
4000
clarity
cut
Fair
count
3000
Good
Very Good
2000
Premium
Ideal
1000
I1
clarity
head(t.table)
12000
10000
carat
8000
6000
4000
2000
0
Fair
Good
Very Good
cut
Premium
Ideal
z = sum(x) / sum(x+y)
return(z)
}
ddply(diamonds, "cut", summarise, custom = t.function(depth, price))
ddply(diamonds, "cut", summarise, custom = sum(depth) / sum(depth + price))
### back to ggplot
10000
# change binwidth
2500
8000
2000
6000
1500
count
count
# histogram
4000
1000
2000
500
0
1
carat
carat
25
20
15
wt
35
30
30
25
mpg
mpg
25
20
20
15
15
10
10
wt
wt
# save plot in variable (hint: data is saved in plot, changes in data do not change plot-data)
20
15
30
25
mpg
p.tmp
20
15
30
25
8
4
4
20
15
wt
5
4
wt
30
3
25
# change mtcars
20
15
10
30
4
8
25
mpg
20
15
15
20
mpg
25
30
10
30
p.tmp
25
8
20
15
10
2
wt
>
summary(p.tmp)
data:
mpg,
cyl,
disp,
hp,
drat,
wt,
qsec,
vs,
am,
rm(t.mtcars)
mapping:
x
=
factor(cyl),
y
=
wt
faceting:
facet_grid(.
~
.,
FALSE)
-----------------------------------
geom_boxplot:
stat_boxplot:
position_dodge:
(width
=
NULL,
height
=
NULL)
factor(cyl)
wt
p.tmp
6
8
3
p.tmp + layer(geom="point")
p.tmp + layer(geom="point") + layer(geom="line")
15
20
mpg
25
30
400
300
factor(cyl)
wt
6
8
200
15
20
mpg
25
30
p.tmp + geom_point(aes(color="darkblue"))
p.tmp + geom_point(color="darkblue")
-1
-1
-2
-2
-3
-3
-3
-2
-1
p.norm + geom_point()
p.norm + geom_point(shape=".")
p.norm + geom_point(colour=alpha("blue", 1/10))
p.norm + geom_point(shape=1)
-1
-1
-2
-2
-3
-2
-1
-3
-2
-1
-3
-3
-3
-2
-1
5
4
3
3
2
5
4
4
wt
3
2
4, 3
#facet_wrap / facet_grid
4, 5
15
6, 3
6, 4
8, 3
8, 5
20
25
30
15
20
25
mpg
30
15
20
25
30
6, 5
5
4
wt
p.tmp + facet_wrap(~cyl)
4, 4
p.tmp + facet_grid(gear~cyl)
p.tmp + facet_wrap(~cyl+gear)
2
15
20
25
30
15
20
25
30
15
20
25
30
mpg
3.4
3.0
4.5
3.2
2.5
4.0
3.0
3.5
2.0
3.0
2.8
5.0
wt
24
26
28
8
30
32
wt
2.5
22
3.0
3.5
3.0
4.0
2.5
3.5
2.0
4.5
2.5
5.0
2.0
12
14
16
18
mpg
22
24
26
28
30
32
18.0
18.5
19.0
19.5
20.0
20.5
21.0
mpg
12
14
YlOrRd
YlOrBr
YlGnBu
YlGn
Reds
RdPu
Purples
PuRd
PuBuGn
PuBu
OrRd
Oranges
Greys
Greens
GnBu
BuPu
BuGn
Blues
Set3
Set2
Set1
Pastel2
Pastel1
Paired
Dark2
Accent
Spectral
RdYlGn
RdYlBu
RdGy
RdBu
PuOr
PRGn
PiYG
BrBG
16
18
wt
p.tmp
p.tmp + scale_x_continuous(limits=c(15,30))
1
15
p.tmp + coord_cartesian(xlim=c(15,30))
20
p.tmp
30
wt
wt
25
mpg
3
2
2
1
# using transformation
16
18
20
22
mpg
24
26
28
30
15
20
mpg
25
30
wt
wt
1
15
### themes
18
mpg
27
low middle
mpg
high
factor(gear)
12
15
10
12
14
count
0
30
14
30
12
25
5
10
20
15
factor(cyl)
6
4
factor(cyl)
count
count
factor(cyl)
count
25
10
10
20
15
factor(1)
factor(1)
factor(cyl)
10
if(length(names(f.survfit$strata)) == 0){
rm(f.start)
else {
for(f.i in 1:length(f.survfit$strata)){
# create data.frame with data from survfit (create column for strata)
rm(f.strata)
for(f.i in 1:length(f.survfit$strata)){
11
rm(f.start, f.subset)
# reorder data
#rename row.names
# return frame
return(f.frame)
}
# define custom function to draw kaplan-meier curve with ggplot
qplot_survival <- function(f.frame, f.CI="default", f.shape=3){
# use different plotting commands dependig whether or not strata's are given
if(f.CI=="default" | f.CI==TRUE ){
# create plot with 4 layers (first 3 layers only events, last layer only censored)
# hint: censoring data for multiple censoring events at timepoint are overplotted
else {
else {
if(f.CI=="default" | f.CI==FALSE){
# without CI
ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv),
direction="hv") + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape)
else {
12
1.0
0.8
0.6
0.4
0.2
200
400
time
600
800
1000
1.0
1.0
0.8
plot(t.survfit)
0.8
0.6
sex=2
sex=1
0.4
0.6
surv
# two strata
surv
strata
0.4
0.2
qplot_survival(t.survframe)
# with CI
0.2
200
400
600
800
1000
time
qplot_survival(t.survframe, TRUE)
200
400
600
800
1000
time
1.0
0.8
qplot_survival(t.survframe)
strata
ph.karno=90
surv
ph.karno=100
0.6
ph.karno=80
ph.karno=70
0.4
ph.karno=60
ph.karno=50
0.2
0.0
0
200
400
600
800
1000
time
13
grid.newpage()
pushViewport(viewport(layout = grid.layout(x,y)))
wt
15
20
25
30
mpg
viewport(layout.pos.row=x, layout.pos.col=y)
wt
4
6
wt
# define graphics
theme_bw()
15
20
mpg
25
30
15
20
mpg
25
30
14