choropleth maps in R

below is the source code for making the choropleths for the infographic about newborns. you can run the following code in R. By the way, I'm using version 3.0.0; some library packages may have a different syntax and output in older versions of R.


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
getwd()
# this is your working directory on your computer

# DOWNLOAD THE FOLLOWING DATA FROM THE INTERNET
provinces <- "http://dl.dropboxusercontent.com/u/7911075/csv%20newborn/provinces.csv"
download.file(provinces, destfile="./provinces.csv")

phattended <- "http://dl.dropboxusercontent.com/u/7911075/csv%20newborn/dataPH2003-2008attended.csv"
download.file(phattended, destfile="./dataPH2003-2008attended.csv")

phbirthshome <- "http://dl.dropboxusercontent.com/u/7911075/csv%20newborn/dataPH2003-2008facility.csv"
download.file(phbirthshome, destfile="./dataPH2003-2008facility.csv")

countries <- "http://dl.dropboxusercontent.com/u/7911075/csv%20newborn/datacountries2005-2011attended.csv"
download.file(countries, destfile="./datacountries2005-2011attended.csv")

# this is a large file from the gadm website, might take a while to finish downloading
phil1 <- "http://gadm.org/data/rda/PHL_adm1.RData"
download.file(phil1, destfile="./phil1.RData")

list.files(".")
dateDownloaded <-date()
dateDownloaded

#  ------------------------------------------------------------------------------------------------
# # LOAD GEO MAPS

library(sp)

con <- "./phil1.RData"
print(load(con))
close(con)
str(gadm, max.level=2)
# phil1.RData has 82 observations of 16 variables

names(gadm)
#  [1] "ID_0"       "ISO"        "NAME_0"     "ID_1"       "NAME_1"    
# [6] "VARNAME_1"  "NL_NAME_1"  "HASC_1"     "CC_1"       "TYPE_1"    
# [11] "ENGTYPE_1"  "VALIDFR_1"  "VALIDTO_1"  "REMARKS_1"  "Shape_Leng"
# [16] "Shape_Area"

#  ------------------------------------------------------------------------------------------------
# # LOAD PROVINCES DATASET
provinces <- read.csv("./provinces.csv")
str(provinces)

#  ------------------------------------------------------------------------------------------------
# # LOAD DATA BIRTHS ATTENDED BY SKILLED PERSONNEL
phattended <- read.csv("./dataPH2003-2008attended.csv", header=TRUE)
str(phattended)

#calculate percent births with skilled attendants, i.e., doctors, nurses, and midwives only
phattended$skilled2008percent <- (phattended$X2008doctorspercent + phattended$X2008nursespercent + phattended$X2008midwivespercent)
phattended$skilled2007percent <- (phattended$X2007doctorspercent + phattended$X2007nursespercent + phattended$X2007midwivespercent)
phattended$skilled2006percent <- (phattended$X2006doctorspercent + phattended$X2006nursespercent + phattended$X2006midwivespercent)
phattended$skilled2005percent <- (phattended$X2005doctorspercent + phattended$X2005nursespercent + phattended$X2005midwivespercent)
phattended$skilled2004percent <- (phattended$X2004doctorspercent + phattended$X2004nursespercent + phattended$X2004midwivespercent)
phattended$skilled2003percent <- (phattended$X2003doctorspercent + phattended$X2003nursespercent + phattended$X2003midwivespercent)
#example: barplot(phattended$X2008midwivespercent, col=unique(phattended$REGION), xlab="", ylab="", cex.axis=0.8)

#multiply by 100
phattended$skilled2008percent <- phattended$skilled2008percent*100
phattended$skilled2007percent <- phattended$skilled2007percent*100
phattended$skilled2006percent <- phattended$skilled2006percent*100
phattended$skilled2005percent <- phattended$skilled2005percent*100
phattended$skilled2004percent <- phattended$skilled2004percent*100
phattended$skilled2003percent <- phattended$skilled2003percent*100

#  ------------------------------------------------------------------------------------------------
# # LOAD DATA PHL HOMEBIRTHS, NORMAL SPONTANEOUS DELIVERY
phbirthshome <- read.csv("./dataPH2003-2008facility.csv", header=TRUE)
str(phbirthshome)
names(phbirthshome)

#  ------------------------------------------------------------------------------------------------
# # FIRST MERGE: INNER JOIN FOR DATASETS OF % HOME BIRTHS & % ATTENDED BY SKILLED PERSONNEL
phbirthdata <- merge(phattended, phbirthshome, by="REGION")
str(phbirthdata)
 
#  ------------------------------------------------------------------------------------------------
# # SECOND MERGE: INNER JOIN TO PROVINCE DATASET
provattended1 <- merge(provinces, phbirthdata, by="REGION")
str(provattended1)
# write.csv(provattended1, file="provattended1.csv")

# SORT MERGED DATASET TO ARRANGE ACCDG TO GADM MAP's ORDER
provattended1.sorted <- provattended1[order(provattended1$PROVINCEORDER) , ]
# write.csv(provattended1.sorted, file="provattended1.sorted.csv")

# ------------------------------------------------------------------------------------------------------------
# # NOW WE CAN MERGE THE COMBINED DATASET TO THE GADM MAP

gadm$region <- as.factor(provattended1.sorted$REGION)
gadm$capital <- as.factor(provattended1.sorted$CAPITAL)
gadm$popln2010 <- as.numeric(provattended1.sorted$POPLN2010)
gadm$landarea <- as.numeric(provattended1.sorted$LANDAREA.km2)
gadm$popdense <- as.numeric(provattended1.sorted$POPDENS2010)
gadm$livebirths2008 <- as.numeric(provattended1.sorted$X2008total)
gadm$skilled2008 <- as.numeric(provattended1.sorted$skilled2008percent)
gadm$homebirths2008 <- as.numeric(provattended1.sorted$X2008HOMEpercent)
gadm$hospbirths2008 <- as.numeric(provattended1.sorted$X2008HOSPpercent)

library(Hmisc)
library(RColorBrewer)

cols4v1 <- brewer.pal(5, "YlGnBu")
pal4v1 <- colorRampPalette(cols4v1)  # <- dark blue cyan white gradient

cols4v2 <- brewer.pal(5, "YlOrBr")
pal4v2 <- colorRampPalette(cols4v2)  # <- brown orange white gradient

cols4v3 <- brewer.pal(5, "PuRd")
pal4v3 <- colorRampPalette(cols4v3)  # <- purple pink white gradient

cols8 <- brewer.pal(8, "Set3")
pal8 <- colorRampPalette(cols8)

cols12 <- brewer.pal(12, "Set3")
pal12 <- colorRampPalette(cols12)

str(gadm$region)
str(gadm$capital)
str(gadm$popln2010)
str(gadm$landarea)
str(gadm$popdense)
str(gadm$livebirths2008)
str(gadm$skilled2008)
str(gadm$homebirths2008)

# --------------------------------------------------------------------
#  PARSE REGIONS INTO LOW-AVERAGE-HIGH

gadm$g4area <- cut2(gadm$landarea, g = 4)
table(gadm$g4area)

gadm$g4popln <- cut2(gadm$popln2010, g = 4)
table(gadm$g4popln)

gadm$g4popdense <- cut2(gadm$popdense, g = 4)
table(gadm$g4popdense)

gadm$g4livebirths2008 <-cut2(gadm$livebirths2008, g = 4)
table(gadm$g4livebirths2008)

gadm$g4skilled2008 <-cut2(gadm$skilled2008, g=4)
table(gadm$g4skilled2008)

gadm$g4homebirths2008 <- cut2(gadm$homebirths2008, g=4)
table(gadm$g4homebirths2008)

gadm$g4hospbirths2008 <- cut2(gadm$hospbirths2008, g=4)
table(gadm$g4hospbirths2008)

gadm$g8popln <- cut2(gadm$popln2010, g = 8)
table(gadm$g8popln)

gadm$g8popdense <- cut2(gadm$popdense, g = 8)
table(gadm$g8popdense)

colg4skilled4v1 = pal4v1(length(levels(gadm$g4skilled2008)))
spplot(gadm, "g4skilled2008", col.regions=colg4skilled4v1, main="% Births with Skilled Attendants, 2008 by Region")
# dev.copy2pdf(file="PH percent births attended 2008.pdf", height =11, width = 8)

colg4home4v2 = pal4v2(length(levels(gadm$g4homebirths2008)))
spplot(gadm, "g4homebirths2008", col.regions=colg4home4v2, main="% Births at Home, Normal Spontaneous Delivery, 2008 by Region")
# dev.copy2pdf(file="PH percent births at home 2008.pdf", height =11, width = 8)

colg4hosp4v3 = pal4v3(length(levels(gadm$g4hospbirths2008)))
spplot(gadm, "g4hospbirths2008", col.regions=colg4hosp4v3, main="% Births at Hospital, Normal Spontaneous Delivery, 2008 by Region")
# dev.copy2pdf(file="PH percent births at hospital 2008.pdf", height =11, width = 8)

colpopln4v1 = pal4v1(length(levels(gadm$g4popln))) # <- dark blue cyan white gradient
colpopln4v2 = pal4v2(length(levels(gadm$g4popln))) # <- brown orange white gradient 
colpopln4v3 = pal4v3(length(levels(gadm$g4popln))) # <- purple pink white gradient
# spplot(gadm, "g4popln", col.regions=colpopln4v1, main="Population by Province")

colregions = pal12(length(levels(gadm$region)))
# spplot(gadm, "region", col.regions=colregions, main="Regions in the Philippines")
# dev.copy2pdf(file="PH regions.pdf", height =11, width = 8)

colpopln8 = pal8(length(levels(gadm$g8popln)))
# spplot(gadm, "g8popln", col.regions=colpopln8, main="Population by Province")
# dev.copy2pdf(file="PH population per province.pdf", height =11, width = 8)

colpopdense8 = pal8(length(levels(gadm$g8popdense)))
# spplot(gadm, "g8popdense", col.regions=colpopdense8, main="Population Density by Province")
# dev.copy2pdf(file="PH population density per province.pdf", height =11, width = 8)

collivebirth4v2 = pal4v2(length(levels(gadm$g4livebirths2008)))
# spplot(gadm, "g4livebirths2008", col.regions=collivebirth4v2, main="Live Births 2008")
# dev.copy2pdf(file="live births 2008.pdf", height =11, width = 8)

# ------------------------------------------------------------------------------------------
# # MAKE WORLD MAP

countries <- read.csv("./datacountries2005-2011attended.csv", header=TRUE)
str(countries)
countries$skilled100 <- countries$birthsattendedpercent*100
countries$g8skilled100 <- cut2(countries$skilled100, g = 8)
table(countries$g8skilled100)


library(Hmisc)
library(RColorBrewer)

cols4v1 <- brewer.pal(5, "YlGnBu")
pal4v1 <- colorRampPalette(cols4v1)  # <- dark blue cyan white gradient

cols4v2 <- brewer.pal(5, "YlOrBr")
pal4v2 <- colorRampPalette(cols4v2)  # <- brown orange white gradient

cols4v3 <- brewer.pal(5, "PuRd")
pal4v3 <- colorRampPalette(cols4v3)  # <- purple pink white gradient

cols8 <- brewer.pal(8, "Set3")
pal8 <- colorRampPalette(cols8)

cols8v2 <- brewer.pal(8, "YlOrRd")
pal8v2 <- colorRampPalette(cols8v2)

cols8v3 <- brewer.pal(8, "YlGnBu")
pal8v3 <- colorRampPalette(cols8v3)

cols8v4 <- brewer.pal(8, "RdPu")
pal8v4 <- colorRampPalette(cols8v4)

cols12 <- brewer.pal(12, "Set3")
pal12 <- colorRampPalette(cols12)

library(rworldmap)

mapattended <- joinCountryData2Map(countries, joinCode = "ISO3", nameJoinColumn = "ISO3V10")
str(mapattended, max.level=2)

par(mai=c(0,0,0.2,0),xaxs="i",yaxs="i")
mapParams <- mapCountryData(mapattended, nameColumnToPlot="skilled100", catMethod = "pretty", numCats = 8, colourPalette = cols8v3, mapTitle="", addLegend=FALSE)
do.call( addMapLegend, c(mapParams, legendWidth=0.5, legendMar = 2, legendLabels="all", legendIntervals="data"))
# dev.copy2pdf(file="countries births skilled attendants 2005-2011 v2.pdf", height =8, width = 11)