Start libraries:
library(sf)
library(tidyverse)
library(rnaturalearth)
library(ggthemes)
library(knitr)
library(ggspatial)
library(tmap)
library(tmaptools)
library(lubridate)
library(raster)
library(rgeos)
library(ggmap)
options(scipen = 999)
In real life the spatial distribution of any phenomena is usually not regular. Irregular is also the distribution of administrative units. This means, if we want to compare different municipalities with each other, absolute values may not be the best option (at least we are then unable to describe the full picture). Next to the absolute values very often density is used. This means we present value per spatial unit (e.g. km2). Let us try to calculate areas of municipalities and then calculate in which municipality is the highest density of beef cattle:
# calculate area for each polygon:
municip$area <- st_area(municip)
# Show areas of first 5 municipalities:
municip$area[1:5]
## Units: [m^2]
## [1] 11896170 207920558 73264580 2717872552 1032406180
We see that, polygon areas are calculated in same units as the reference system of geolayer (currently meters) and the variable type is Units. In current case it is reasonable to express it in square kilometers. We can convert units from square meter to square kilometer and store it as numeric:
municip <- municip %>%
mutate(area = as.numeric(area) / 1000000)
glimpse(municip)
## Rows: 79
## Columns: 7
## $ ONIMI <chr> "Ruhnu vald", "Muhu vald", "Viimsi vald", "Saaremaa vald", "H…
## $ OKOOD <chr> "0689", "0478", "0890", "0714", "0205", "0303", "0907", "0897…
## $ MNIMI <chr> "Saare maakond", "Saare maakond", "Harju maakond", "Saare maa…
## $ MKOOD <chr> "0074", "0074", "0037", "0074", "0039", "0068", "0056", "0084…
## $ TYYP <chr> "1", "1", "1", "1", "1", "1", "1", "4", "1", "4", "1", "4", "…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((455191.3 64..., MULTIPOLYGON (((…
## $ area <dbl> 11.89617, 207.92056, 73.26458, 2717.87255, 1032.40618, 17.330…
Where is the biggest / smallest municipality (by area) of Estonia?
# filter biggest and smallest
municip_extreme <- municip %>%
filter(area == min(area) | area == max(area)) #"|" corresponds to 'OR'
Create the map (with ggplot2):
ggplot()+
theme_minimal()+
geom_sf(data = municip, fill="grey96", colour = "grey80", size = 0.25)+
geom_sf(data = municip_extreme, aes(fill = as.factor(area), colour = as.factor(area)), size=1)+
geom_sf_text(data = municip_extreme, aes(label=ONIMI))+
guides(fill=F, colour = F)+
scale_fill_manual(values =c("red", "dodgerblue"))+
scale_colour_manual(values =c("red", "dodgerblue"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
What is the area of biggest municipality? Smallest? What is average area of municipalities?
Let’s turn back to the geolayer of livestock in Estonia. First calculate the area of municipalities and convert it to square kilometers!
# Calculate the area by your self!!!
Hopefully it was easy! :)
In next step calculate the spatial density of beef cattle in
municipalities:
# it can be done in several ways
# for example you can add calculation directly to the plotting syntax::
ggplot()+
theme_minimal()+
geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_cattle / area), colour = "grey60", size = 0.25)+
scale_fill_viridis_c(na.value = "red")
# Or you can calculate the density as separate column and after that draw the map:
agrAnimal_2_sf_municip_aggr <- agrAnimal_2_sf_municip_aggr %>%
mutate(beef_dens = beef_cattle / area)
ggplot()+
theme_minimal()+
geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_dens), colour = "grey60", size=0.25)+
scale_fill_viridis_c(na.value = "red")+
labs(fill = "animals\nper\nkm2")
In which municipality is the beef cattle density highest?
# geom column slows down the query if we want to get quick overview.
#Copy the layer and delete the geometry column:
temp0 <- agrAnimal_2_sf_municip_aggr %>%
st_drop_geometry()
# municipality with highest density of beef cattle:
temp0 %>%
filter(beef_dens == max(beef_dens)) %>% # filter max value
kable() # format the output
| ONIMI | OKOOD | MNIMI | MKOOD | TYYP | municip_key | beef_cattle | estonian_holstein_cattle | estonian_native_cattle | estonian_red_cattle | goats | pigs | sheeps | area | beef_dens |
|---|
No results? First you have to remove NA values!
(NA - Not Available)
To remove all NA’s from table you can use
na.omit():
temp1 <- na.omit(temp0)
Previous command deletes all rows where value is
NA.
But if you want to filter NA by column then it’s reasonable
to use is.na() operator:
temp2 <- temp0 %>%
filter(!is.na(beef_dens))
Now we can try to query again in which municipality the beef livestock density is the highest:
temp2 %>%
filter(beef_dens == max(beef_dens)) %>%
kable()
| ONIMI | OKOOD | MNIMI | MKOOD | TYYP | municip_key | beef_cattle | estonian_holstein_cattle | estonian_native_cattle | estonian_red_cattle | goats | pigs | sheeps | area | beef_dens |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Kihnu vald | 0303 | Pärnu maakond | 0068 | 1 | kihnu vald | 147 | 6 | 2 | NA | NA | NA | 457 | 17.32593 | 8.484393 |
Is it higher than population density? :)
In some cases we don’t want to remove NA-values, but
replace them with other value, e.g. zero:
# replace NA with 0:
temp3 <- temp0 %>%
mutate(beef_dens = ifelse(is.na(beef_dens), 0, beef_dens))
# or:
temp4 <- temp0 # make copy
temp4$beef_dens[is.na(temp4$beef_dens)] <- 0 # replace NA with 0
How can we select top 10 municipalities with highest / lowest density of cattle?
# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>%
select(ONIMI, beef_cattle, beef_dens) %>%
arrange(beef_dens) %>%
head(10) %>%
kable()
# highest density in top10 municipalities:
temp3 %>%
select(ONIMI, beef_cattle, beef_dens) %>%
arrange(-beef_dens) %>% # sort from biggest to smallest
head(10) %>%
kable()
Not working?! It’s because there is a conflict between
raster and dplyr package. R doesn’t know from
which library the functionality of select() should be
applied (actually R is using the functionality from the package which
was switched on last). Currently R tries it from package raster…
To avoid it:
# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>%
dplyr::select(ONIMI, beef_cattle, beef_dens) %>%
arrange(beef_dens) %>%
head(10) %>%
kable()
| ONIMI | beef_cattle | beef_dens |
|---|---|---|
| Keila linn | NA | 0 |
| Maardu linn | NA | 0 |
| Loksa linn | NA | 0 |
| Viljandi linn | NA | 0 |
| Tallinn | NA | 0 |
| Rakvere linn | NA | 0 |
| Võru linn | NA | 0 |
| Sillamäe linn | NA | 0 |
| Narva linn | NA | 0 |
| Kohtla-Järve linn | NA | 0 |
# highest density in top10 municipalities:
temp3 %>%
dplyr::select(ONIMI, beef_cattle, beef_dens) %>%
arrange(-beef_dens) %>% # sort from biggest to smallest
head(10) %>%
kable()
| ONIMI | beef_cattle | beef_dens |
|---|---|---|
| Kihnu vald | 147 | 8.484393 |
| Antsla vald | 2357 | 5.741432 |
| Haapsalu linn | 1546 | 5.687526 |
| Muhu vald | 1053 | 5.064602 |
| Ruhnu vald | 57 | 4.791629 |
| Jõhvi vald | 564 | 4.551057 |
| Vormsi vald | 427 | 4.495625 |
| Hiiumaa vald | 4046 | 3.918872 |
| Valga vald | 2862 | 3.817733 |
| Rapla vald | 3277 | 3.812873 |
Sometimes we are facing not only with the fact that thematic layer
doesn’t fit well with base map, but even bigger problem is that you
don’t have a proper base map.
One nice possibility is to use geolayers from Natural Earth. And if you
are R-user you can to it directly from R vignette.
In next example we have some dataset about genetic variance collected in different locations. Your task is to map it to reveal potential spatial patterns. Dataset contains geographical coordinates, sample ID and variance (variable1) of some gene parameter (content is not important right now).
Import the data:
genes <- read.csv2("http://aasa.ut.ee/Rspatial/data/geneticVariance.csv")
glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude <chr> "-5.67338", "-5.67338", "-5.67338", "-5.67338", "-5.67338", …
## $ longitude <chr> "145.17883", "145.17883", "145.17883", "145.17883", "145.178…
Everything is okay?
Look again! Latitude and longitude are stored as factors. Convert them
to numeric (it can’t be done directly! First you have to convert it to
character):
genes <- genes %>%
mutate(longitude = as.numeric(as.character(longitude)),
latitude = as.numeric(as.character(latitude)))
glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …
Visualize it:
ggplot()+
geom_point(data = genes, aes(x = longitude, y = latitude, colour = variable1))
The number of dots on plot is 12, but the number of rows in dataset
is 70. Why there is such a big difference?
It is because in every location several samples were collected. Can we
avoid this overlapping and bring out all the samples??
As usual there are several options:
Principles of this operation are well described in here link. But we are not doing it just on plot but jitter the locations in data table.
In GIS the most common data format is probably SHP. It works quite well, but has several disadvantages (one layer consists of several files [geometry, attributes, reference system etc]). Today on raising standard for the spatial objects is simple features (sf)
For point data this conversion is very simple:
glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …
genes_sf <- st_as_sf(genes, coords = c("longitude", "latitude"), crs = 4326)
class(genes_sf)
## [1] "sf" "data.frame"
And now we can jitter the points:
genes_sf_jitt <- st_jitter(genes_sf, factor = 0.02) # read from Help (?st_jitter), how the 'factor' parameter works!
ggplot()+
geom_sf(data = genes_sf_jitt, aes(colour = variable1))+
scale_colour_viridis_c()
Better! But still two major problems: jittering doesn’t clarify the picture (maybe we should try calculate average for every location) and secondly although we can see the coordinate values on axes, it is not enough. For better spatial context we need a proper base map!
First we calculate average values for every location:
glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …
genes_aggr <- genes %>%
group_by(longitude, latitude) %>%
summarise(var1_mean = mean(variable1)) %>%
ungroup()
## `summarise()` has grouped output by 'longitude'. You can override using the
## `.groups` argument.
# see the result:
genes_aggr
## # A tibble: 12 × 3
## longitude latitude var1_mean
## <dbl> <dbl> <dbl>
## 1 140. -5.48 3959071.
## 2 143. -5.85 3633741.
## 3 144. -4.19 4430432.
## 4 144. -6.15 3913224.
## 5 145. -6.02 3067700.
## 6 145. -5.67 3318188.
## 7 146. -6.54 3404230.
## 8 146. -6.97 3936487.
## 9 147. -6.68 1610366
## 10 148. -9.99 3262092
## 11 152. -4.64 2663686.
## 12 152. -4.22 2069084.
The result is just a plain table. We have to convert it to spatial sf-object:
genes_aggr_sf <- st_as_sf(genes_aggr, coords = c("longitude", "latitude"), crs = 4326) # 4326 refers to WGS84!
Plot it with tmap in interactive mode! look
session 1
There are many different geolayers available at different scale level. Currently we download just country polygons:
countries50 <- ne_download(scale = 50, type = 'countries', category = 'cultural', returnclass = "sf")
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Anto Aasa\AppData\Local\Temp\RtmpsPDrc7", layer: "ne_50m_admin_0_countries"
## with 242 features
## It has 168 fields
## Integer64 fields read as strings: NE_ID
Plot the base map and sample points
ggplot()+
geom_sf(data = countries50, fill = "white", colour = "grey", size = 0.25)+
geom_sf(data = genes_aggr_sf, colour = "red")
Experienced geographer can say that gene samples are collected from Papua New Guinea and Indonesia. Select those countries from base map layer:
countries50_ii <- countries50 %>%
filter(ADMIN == "Papua New Guinea" | ADMIN == "Indonesia") # '|' means 'OR'!!
#plot:
ggplot()+
geom_sf(data = countries50_ii, fill = "white", colour = "grey", size=0.25)+
geom_sf(data=genes_aggr_sf, colour = "red")
Much better! But we should trim the base map more (crop the empty western part):
# Crop by coordinates:
countries50_iii <- st_crop(countries50_ii, c(xmin = 138, xmax = 155, ymin = -12, ymax = 0))
#plot:
ggplot()+
geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
geom_sf(data = genes_aggr_sf, colour = "red")
This is already pretty good. And now the spatial pattern:
ggplot()+
geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
geom_sf(data = genes_aggr_sf, aes(colour = var1_mean), size=3)+
scale_colour_viridis_c()
When you want to add good spatial context to your thematic map, the
best option is usually to create my your own. There are many good data
sources available (Land
Board Estonia, OSM, Natural Earth
etc).
The only problem is, that it may take a lot of time to create good,
relevant and informative map. For quicker solution we can use base maps
from other service providers (Google, OSM, Stamen etc).
Next we can try possibilities offered by ggmap. Syntax
is slightly different, but the principles are the same:
# calculate & define bounding box:
# calculate bounding box of map extent:
gene_base_bbox <- st_bbox(countries50_iii)
gene_bbox <- st_bbox(gene_base_bbox) %>% as.numeric()
# it can be defined as an independednt object:
bbox <- c(left = gene_bbox[1],
bottom = gene_bbox[2],
right = gene_bbox[3],
top = gene_bbox[4])
# download the map:
gene_stamen_map <- get_stamenmap(bbox, maptype = "terrain-background", zoom = 7)
#plot it:
ggmap(gene_stamen_map, extent = "device")
Convert sf to data frame and plot it with
geom_point:
# coordinates as table:
genes_aggr_crd <- genes_aggr_sf %>%
st_coordinates() %>% # extract coordinates
as_tibble() # store them as table
# extract attributes from sf-object:
genes_aggr_df <- genes_aggr_sf %>%
st_drop_geometry()
# bind columns together:
genes_aggr_df <- bind_cols(genes_aggr_df, genes_aggr_crd)
ggmap(gene_stamen_map, extent = "device")+
theme_minimal()+
theme(axis.title = element_blank(),
axis.text = element_blank())+
geom_point(data = genes_aggr_df, aes(x = X, y = Y, fill = var1_mean), size=3, shape = 22, colour = "grey30")+ # you have to define coordinates columns as well!
scale_fill_gradientn(colours = c("navyblue", "grey", "red"))+ # define your own palette
labs(title = "Gene map",
caption = "A. Aasa",
fill = "value")
This was an example how to put spatial points on base map. Next, we look example of line maps.
For that we use data about flights of Estonian national airline Nordica. Dataset covers all the flights operated by Nordica from December 2, 2018 to December 3, 2018. And our aim is to create typical destinations map:
Read the data and check file structure:
flights <- read_delim("http://aasa.ut.ee/Rspatial/data/nordicaFlights.csv",
delim = ";", escape_double = FALSE, locale = locale(encoding = "ISO-8859-1"),
trim_ws = TRUE)
## Rows: 94 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (6): date, from, to, fligjt, airplane, status
## time (2): departure, arrival
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(flights)
## Rows: 94
## Columns: 8
## $ date <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:20:00, 09:25:00,…
## $ arrival <time> 08:25:00, 08:20:00, 07:20:00, 12:30:00, 09:55:00, 12:35:00,…
## $ from <chr> "Tallinn (TLL)", "Tallinn (TLL)", "Tallinn (TLL)", "München …
## $ to <chr> "München (MUC)", "Viin (VIE)", "Stockholm (ARN)", "Tallinn (…
## $ fligjt <chr> "LO8165", "LO8193", "LO8121", "LO8166", "LO790", "LO8194", "…
## $ airplane <chr> "CR7 i", "CR9 i", "CR9 i", "CR7 i", "CR9 i", "CR9 i", …
## $ status <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…
flights %>%
head() %>%
kable()
| date | departure | arrival | from | to | fligjt | airplane | status |
|---|---|---|---|---|---|---|---|
| 02.12.2018 | 07:00:00 | 08:25:00 | Tallinn (TLL) | München (MUC) | LO8165 | CR7 i | Graafikujärgne. |
| 02.12.2018 | 07:00:00 | 08:20:00 | Tallinn (TLL) | Viin (VIE) | LO8193 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 07:20:00 | 07:20:00 | Tallinn (TLL) | Stockholm (ARN) | LO8121 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 09:10:00 | 12:30:00 | München (MUC) | Tallinn (TLL) | LO8166 | CR7 i | Graafikujärgne. |
| 02.12.2018 | 09:20:00 | 09:55:00 | Tallinn (TLL) | Varssavi (WAW) | LO790 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 09:25:00 | 12:35:00 | Viin (VIE) | Tallinn (TLL) | LO8194 | CR9 i | Graafikujärgne. |
All the data is nicely imported. All columns are stored as factors.
But this is currently not important. Question is how can we add to the
table the geographical coordinates of origin and destination?
As you probably know every airport has unique code (IATA
code). Currently those codes are not in separate column. With help
of tidyr
and stringr
package we can separate column with airport name and code to two
columns:
# separate airport name and airport code:
flights <- flights %>%
separate(from, c("from_name" ,"from_code"), ' \\(')
flights <- flights %>%
separate(to, c("to_name" ,"to_code"), '\\(')
#remove ')' from string:
flights <- flights %>%
mutate(from_code = str_replace(from_code, "\\)", ""))
flights <- flights %>%
mutate(to_code = str_replace(to_code, "\\)", ""))
flights %>%
head() %>%
kable()
| date | departure | arrival | from_name | from_code | to_name | to_code | fligjt | airplane | status |
|---|---|---|---|---|---|---|---|---|---|
| 02.12.2018 | 07:00:00 | 08:25:00 | Tallinn | TLL | München | MUC | LO8165 | CR7 i | Graafikujärgne. |
| 02.12.2018 | 07:00:00 | 08:20:00 | Tallinn | TLL | Viin | VIE | LO8193 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 07:20:00 | 07:20:00 | Tallinn | TLL | Stockholm | ARN | LO8121 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 09:10:00 | 12:30:00 | München | MUC | Tallinn | TLL | LO8166 | CR7 i | Graafikujärgne. |
| 02.12.2018 | 09:20:00 | 09:55:00 | Tallinn | TLL | Varssavi | WAW | LO790 | CR9 i | Graafikujärgne. |
| 02.12.2018 | 09:25:00 | 12:35:00 | Viin | VIE | Tallinn | TLL | LO8194 | CR9 i | Graafikujärgne. |
Airport codes are now nicely in separate columns. Next step is to find additional dataset where we have next to the airport codes also the geographical coordinates of airports.
airports <- read.csv("https://datahub.io/core/airport-codes/r/airport-codes.csv")
airports2 <- airports %>%
filter(iata_code != "")
airports2 %>%
head() %>%
kable()
| ident | type | name | elevation_ft | continent | iso_country | iso_region | municipality | gps_code | iata_code | local_code | coordinates |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 03N | small_airport | Utirik Airport | 4 | OC | MH | MH-UTI | Utirik Island | K03N | UTK | 03N | 169.852005, 11.222 |
| 07FA | small_airport | Ocean Reef Club Airport | 8 | NA | US | US-FL | Key Largo | 07FA | OCA | 07FA | -80.274803161621, 25.325399398804 |
| 0AK | small_airport | Pilot Station Airport | 305 | NA | US | US-AK | Pilot Station | PQS | 0AK | -162.899994, 61.934601 | |
| 0CO2 | small_airport | Crested Butte Airpark | 8980 | NA | US | US-CO | Crested Butte | 0CO2 | CSE | 0CO2 | -106.928341, 38.851918 |
| 0TE7 | small_airport | LBJ Ranch Airport | 1515 | NA | US | US-TX | Johnson City | 0TE7 | JCY | 0TE7 | -98.62249755859999, 30.251800537100003 |
| 13MA | small_airport | Metropolitan Airport | 418 | NA | US | US-MA | Palmer | 13MA | PMX | 13MA | -72.31140136719999, 42.223300933800004 |
# Is airport of Tallinn (TLL) available:
airports2 %>%
filter(iata_code == "TLL") %>%
kable()
| ident | type | name | elevation_ft | continent | iso_country | iso_region | municipality | gps_code | iata_code | local_code | coordinates |
|---|---|---|---|---|---|---|---|---|---|---|---|
| EETN | large_airport | Lennart Meri Tallinn Airport | 131 | EU | EE | EE-37 | Tallinn | EETN | TLL | 24.832799911499997, 59.41329956049999 |
As the airport coordinates are stored in one column we have to use
separate() function once again:
airp_crd <- airports2 %>%
separate(coordinates, c("lng" ,"lat"), ", ")
# convert both columns to numeric and select relevant columns:
airp_crd <- airp_crd %>%
mutate(lng = as.numeric(lng), lat = as.numeric(lat)) %>%
dplyr::select(iata_code, lng, lat, municipality)
glimpse(airp_crd)
## Rows: 9,225
## Columns: 4
## $ iata_code <chr> "UTK", "OCA", "PQS", "CSE", "JCY", "PMX", "WLR", "NUP", "…
## $ lng <dbl> 169.85200, -80.27480, -162.89999, -106.92834, -98.62250, …
## $ lat <dbl> 11.22200, 25.32540, 61.93460, 38.85192, 30.25180, 42.2233…
## $ municipality <chr> "Utirik Island", "Key Largo", "Pilot Station", "Crested B…
Next we have to join tables (see Practical session 1)
# join flights and airport table by origin:
flights2 <- left_join(flights, airp_crd, by = c("from_code" = "iata_code"))
#rename coordinates columns:
flights2 <- flights2 %>%
rename(from_x = lng, from_y = lat, from_mun = municipality)
#join flights and airport table by destination:
flights2 <- left_join(flights2, airp_crd, by = c("to_code" = "iata_code"))
#rename coordinates columns:
flights2 <- flights2 %>%
rename(to_x = lng, to_y = lat, to_mun = municipality)
glimpse(flights2)
## Rows: 104
## Columns: 16
## $ date <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:10:00,…
## $ arrival <time> 08:25:00, 08:25:00, 08:20:00, 07:20:00, 12:30:00, 12:30:00,…
## $ from_name <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "München", "Münc…
## $ from_code <chr> "TLL", "TLL", "TLL", "TLL", "MUC", "MUC", "TLL", "VIE", "TLL…
## $ to_name <chr> "München ", "München ", "Viin ", "Stockholm ", "Tallinn ", "…
## $ to_code <chr> "MUC", "MUC", "VIE", "ARN", "TLL", "TLL", "WAW", "TLL", "AMS…
## $ fligjt <chr> "LO8165", "LO8165", "LO8193", "LO8121", "LO8166", "LO8166", …
## $ airplane <chr> "CR7 i", "CR7 i", "CR9 i", "CR9 i", "CR7 i", "CR7 i", …
## $ status <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…
## $ from_x <dbl> 24.83280, 24.83280, 24.83280, 24.83280, 11.78610, 11.69030, …
## $ from_y <dbl> 59.4133, 59.4133, 59.4133, 59.4133, 48.3538, 48.1378, 59.413…
## $ from_mun <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "Munich", "Munic…
## $ to_x <dbl> 11.78610, 11.69030, 16.56970, 17.91860, 24.83280, 24.83280, …
## $ to_y <dbl> 48.3538, 48.1378, 48.1103, 59.6519, 59.4133, 59.4133, 52.165…
## $ to_mun <chr> "Munich", "Munich", "Vienna", "Stockholm", "Tallinn", "Talli…
Plot the flight on base map:
ggplot()+
geom_sf(data = countries50, fill = "gray80", col = "gray40", size = 0.5) +
geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")
We focus on Europe. For that we can use st_crop()
function or we can filter out only European countries:
europe <- countries50 %>%
filter(REGION_UN == "Europe")
# plot:
ggplot()+
geom_sf(data = europe)+
geom_segment(data = flights2, aes(x=from_x, xend= to_x, y=from_y, yend = to_y), col = "red")
Base map needs some additional croping
europe2 <- st_crop(europe, c(xmin = -10,
xmax = 41,
ymin = 45,
ymax= 70))
# plot:
ggplot()+
theme_minimal()+
geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")
Use curves instead of straight lines:
ggplot()+
theme_minimal()+
geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))
# png:
png("flights.png", res=300, units = "cm", height = 15, width = 15)
ggplot()+
theme_minimal()+
geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))
dev.off() # It is crucial to close the writing session!
# pdf:
pdf("flights.pdf", height = 15, width = 15)
ggplot()+
theme_minimal()+
geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))
dev.off() # It is crucial to close the writing session!
The last version looks already like a thematic map. But still there are some shortcomings:
We can try to fix it. Add the north arrow and map scale:
Plot it on top stamen map! In case of
ggmap you have to use geom_segment()
instead of geom_curve()!
To create the map we need geolayers:
You can find this kind of datasets from many different places. For
example you can download data directly from raster
library:
Download country contour (level = 0) and convert it to
sf:
gadm_0_estonia <- raster::getData('GADM', country = 'EST', level = 0)
gadm_0_estonia <- st_as_sf(gadm_0_estonia)
If direct download doesn’t work you can download it from here: http://aasa.ut.ee/Rspatial/data/gadm36_EST_0_sf.rds
Use download.file() & read_rds().
Actually it is better to obtain all the data layers from the same
source. Let’s get more familiar with Natural Earth. You can
download data from there manually. But we can do it directly from R as
well. For that you need to install package
rnaturalearth.
# install from CRAN:
install.packages("rnaturalearth")
Plot country contour:
tm_shape(gadm_0_estonia)+
tm_borders()
In next step we try to download layer of countries:
# download:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip", destfile="ne_10m_admin_0_countries_lakes.zip")
# unzip:
unzip("ne_10m_admin_0_countries_lakes.zip")
# import to R:
ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")
# plot to see the visual:
tm_shape(ne_countries_sf)+
tm_borders()
# Check the table structure (it's faster, when you drop the geometry column):
ne_countries_sf %>%
st_drop_geometry() %>%
glimpse()
# We need only contour of Estonia:
ne_countries_sf <- ne_countries_sf %>%
filter(NAME == "Estonia")
# plot the result:
tm_shape(ne_countries_sf)+
tm_borders("red")
There is a chance that direct download isn’t working (that’s the main problem with 3rd party data sources… Usually it appears during the live demo’s).
Next download rivers, lakes, roads & populated places (it may not work either):
# Rivers :
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_lake_centerlines.zip",
destfile = "ne_10m_rivers_lake_centerlines.zip")
unzip("ne_10m_rivers_lake_centerlines.zip")
# Rivers (additional data for Europe):
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_europe.zip",
destfile="ne_10m_rivers_europe.zip")
unzip("ne_10m_rivers_europe.zip")
# Lakes:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip",
destfile = "ne_10m_lakes.zip")
unzip("ne_10m_lakes.zip")
# Lakes (Europe supplement):
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes_europe.zip",
destfile = "ne_10m_lakes_europe.zip")
unzip("ne_10m_lakes_europe.zip")
# populated places:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip",
destfile = "ne_10m_populated_places.zip")
unzip("ne_10m_populated_places.zip")
# roads:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip",
destfile = "ne_10m_roads.zip")
unzip("ne_10m_roads.zip")
If previous download wasn’t successful you should download all the layers from course web page and after you can import layers to R:
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_admin_0_countries_lakes.zip", destfile = "ne_10m_admin_0_countries_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes.zip", destfile = "ne_10m_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes_europe.zip", destfile = "ne_10m_lakes_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_populated_places.zip", destfile = "ne_10m_populated_places.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_europe.zip", destfile = "ne_10m_rivers_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_lake_centerlines.zip", destfile = "ne_10m_rivers_lake_centerlines.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_roads.zip", destfile = "ne_10m_roads.zip")
# unzip all the layers!
unzip("filename.zip")
Import downloaded data:
ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")
ne_rivers_sf <- st_read("ne_10m_rivers_lake_centerlines.shp")
ne_rivers_europe_sf <- st_read("ne_10m_rivers_europe.shp")
ne_lakes_europe_sf <- st_read("ne_10m_lakes_europe.shp")
ne_lakes_sf <- st_read("ne_10m_lakes.shp")
ne_populated_places_sf <- st_read("ne_10m_populated_places.shp")
ne_roads_sf <- st_read("ne_10m_roads.shp")
Test, how it looks:
tmap_mode("plot")
## tmap mode set to plotting
tm_shape(ne_rivers_sf)+
tm_lines("dodgerblue")+
tm_shape(ne_rivers_europe_sf)+
tm_lines("blue")
## Warning: The shape ne_rivers_sf contains empty units.
As you see, all the data is global. But we only need data relevant to
Estonia. For that we can use st_intersection().
Check, how the function works:
?st_intersection
# if sf-package is not active:
??st_intersection
Now clip all the layers with Estonian contour:
# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>%
filter(NAME == "Estonia")
ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf) # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)
# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>%
filter(NAME == "Estonia")
ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)
ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf) # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)
Success? Check the result:
tm_shape(ne_countries_sf)+
tm_borders("grey")+
tm_shape(ne_roads_sf)+
tm_lines("red")
Sometimes the geometry is not valid or invalid. In that case the
function st_make_valid()may help:
# for example:
ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)
You can read more about tidying the feature geometry from here: link.
Plot all the results:
tm_shape(ne_countries_sf)+
tm_borders()+
tm_shape(ne_rivers_sf)+
tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
tm_lines("blue")+
tm_shape(ne_lakes_sf)+
tm_polygons("blue")+
tm_shape(ne_roads_sf)+
tm_lines("red")+
tm_shape(ne_populated_places_sf)+
tm_dots("red")+
tm_shape(ne_populated_places_sf)+
tm_text("NAME")
Number of places is not very big, but adds some additional spatial context to the map.
As it was already mentioned there are always different options to
achieve the goal (and quite often some of them don’t work when needed…).
For example you can download data from Natural Earth
with help of rnaturalearth::ne_download():
ne_roads <- ne_download(scale = "large", type = "roads", category = "cultural", returnclass = "sf")
class(ne_roads)
# you should remove unneeded objects from workspace:
rm()
It arrives as sf.
For elevation we can use already familiar getData
function from library called raster. Actually there are
several datasets available (administrative contours, elevation, climatic
data etc. Read more closely from here: link
From there we can use elevation data aggregated from SRTM 90 m
(Shuttle Radar Topography Mission) elevation data link.
Elevation raster for Estonia:
elev <- ?raster::getData('alt', country = "EST", mask = T)
But sometimes the download is not working… In that case you can download the elevation layer from link which appears in error message. For Estonia it’s https://biogeo.ucdavis.edu/data/diva/msk_alt/EST_msk_alt.zip
Finally you have the layer and you can plot it:
# check the type of downloaded object:
class(elev)
## [1] "RasterLayer"
## attr(,"package")
## [1] "raster"
# plot:
plot(elev)
Nice! But resolution is not very high. We can access also SRTM 90-meter resolution data! SRTM is downloaded by tiles. We have to define Estonian coordinates (24E 58N is quite safe bet!). Be patient raster is quite big and download may take some time:
srtm <- raster::getData('SRTM', lon = 24, lat = 58)
## Warning in raster::getData("SRTM", lon = 24, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead
Check the result:
plot(srtm)
It’s only western part of Estonia. Download east as well:
srtm2 <- raster::getData('SRTM', lon = 27, lat = 58)
## Warning in raster::getData("SRTM", lon = 27, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead
Downloaded rasters are stored as separate objects. We can bind them
together with mosaic():
srtmmosaic <- mosaic(srtm, srtm2, fun = mean)
plot(srtmmosaic)
Better! But if you want to keep it in borders of Estonia, we can
crop() and mask() raster outside of the
borders:
srtmmosaic <- crop(srtmmosaic, extent(ne_countries_sf))
srtmmosaic <- mask(srtmmosaic, ne_countries_sf)
plot(srtmmosaic)
Why we need crop()?
Looks like we have now all the layers we need. And we can put all the
layers in logical order together. We can avoid ggplot() and
tmap(). Use just plot():
library(maptools) # enables function 'pointLabel' to add text labels
#plot(st_geometry(ne_countries_sf), col = "grey")
plot(srtmmosaic, col = (terrain.colors(20)))
plot(ne_roads_sf, add = T, col = "red", lwd = 0.1)
plot(ne_lakes_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_europe_sf, add = T, col = "dodgerblue1")
plot(ne_populated_places_sf, col = "black", add = T)
# Add place names
pointLabel(st_coordinates(ne_populated_places_sf),labels = ne_populated_places_sf$NAME)
Some people prefer tmap (it’s more intuitive and
developed specially for maps):
tm_shape(srtmmosaic)+
tm_raster(palette = c("darkgreen", "yellow", "orange", "red"), style = "cont", title = "m.a.s.l.")+
tm_shape(ne_rivers_sf)+
tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
tm_lines("blue")+
tm_shape(ne_lakes_sf)+
tm_polygons("dodgerblue1")+
tm_shape(ne_roads_sf)+
tm_lines("red")+
tm_shape(ne_populated_places_sf)+
tm_dots("red")+
tm_shape(ne_populated_places_sf)+
tm_text("NAME")+
tm_scale_bar(position = c("left", "bottom"))+
tm_layout("Physical map of Estonia")+
tm_credits("A. Aasa")
## stars object downsampled to 1717 by 583 cells. See tm_shape manual (argument raster.downsample)
Satistfactory?
There are quite often the cases where we want to combine several
spatial layers and derive new information from this combination.
Let us pretend the situation where we want to calculate average
elevation above sea level for every county in Estonia.
We need the layer of Estonian counties in format of SpatialPolygonsDataFrame. Download it again:
gadm_1 <- raster::getData('GADM', country = 'EST', level = 1)
## Warning in raster::getData("GADM", country = "EST", level = 1): getData will be removed in a future version of raster
## . Please use the geodata package instead
class(gadm_1)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# extract mean from raster:
library(maptools)
alt_EST<- raster::getData('alt', country='EST', mask=TRUE)
## Warning in raster::getData("alt", country = "EST", mask = TRUE): getData will be removed in a future version of raster
## . Please use the geodata package instead
plot(alt_EST)
region_Ave_Elev <- raster::extract(alt_EST, gadm_1, fun = mean, na.rm = TRUE, sp = T)
# To continue you have to know in which format is the output:
class(region_Ave_Elev)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
#Check the data:
region_Ave_Elev@data %>%
head()
## GID_0 NAME_0 GID_1 NAME_1 VARNAME_1 NL_NAME_1
## 1 EST Estonia EST.1_1 Harju Harjumaa|Harju maakond <NA>
## 9 EST Estonia EST.2_1 Hiiu Hiiumaa|Hiiu maakond|Dagö <NA>
## 10 EST Estonia EST.3_1 Ida-Viru Ida-Virumaa|Ida-Viru maakond <NA>
## 11 EST Estonia EST.4_1 Järva Jaerva|Järvamaa|Järva maakond <NA>
## 12 EST Estonia EST.5_1 Jõgeva Jogeva|Jõgevamaa|Jõgeva maakond <NA>
## 14 EST Estonia EST.7_1 Lääne Laeaene|Läänemaa|Lääne maakond <NA>
## TYPE_1 ENGTYPE_1 CC_1 HASC_1 EST_msk_alt
## 1 Maakond County <NA> EE.HA 49.80205
## 9 Maakond County <NA> EE.HI 17.16073
## 10 Maakond County <NA> EE.IV 53.54464
## 11 Maakond County <NA> EE.JR 79.18902
## 12 Maakond County <NA> EE.JN 69.83882
## 14 Maakond County <NA> EE.LN 18.03521
Plot the results
# Convert SpatialpolygonDataFrame to sf:
region_Ave_Elev_sf <- st_as_sf(region_Ave_Elev)
# round the mean value (for map labels!):
region_Ave_Elev_sf <- region_Ave_Elev_sf %>%
mutate(EST_msk_alt2 = round(EST_msk_alt, 0))
# create tmap as separate object:
est_elev_counties <- tm_shape(region_Ave_Elev_sf)+
tm_polygons("EST_msk_alt", style = "pretty", palette = "viridis", title = "m.a.s.l.")+
tm_text("EST_msk_alt2")
# check the result:
print(est_elev_counties)
We can save the previous map:
# first we need to know the aspect ratio of map:
asp <- tmaptools::get_asp_ratio(est_elev_counties)
tmap_save(est_elev_counties, "est_elev_counties.png", height = 5, with = 5 * asp, units = "in", dpi = 300)
# If you don't remember the working directory:
getwd()
# the exported png-map should be there!
# download climate means (can be slow!):
climate <- raster::getData('worldclim', var ='bio', res = 0.5, lon = 26, lat = 58)
## Warning in raster::getData("worldclim", var = "bio", res = 0.5, lon = 26, : getData will be removed in a future version of raster
## . Please use the geodata package instead
plot(climate)
# select Annual Mean Temperature only:
climate_2 <- climate$bio1_16
plot(climate_2) # Wrong units! Celsius degree * 10
# mask it with Estonian contour:
climate_2 <- crop(climate_2, extent(ne_countries_sf))
climate_masked <- mask(climate_2, ne_countries_sf)
plot(climate_masked)
# Annual modelled mean air temperature map:
climate_masked <- climate_masked / 10 # to get real values
plot(st_geometry(ne_countries_sf))
plot(climate_masked, add= T, col = topo.colors(20))
plot(st_geometry(ne_countries_sf), add = T)
# we have some populated places:
tm_shape(climate_masked)+
tm_raster("bio1_16", palette = c("darkgreen", "grey90", "orange"), style = "cont")+
tm_shape(ne_populated_places_sf)+
tm_text("NAME")
#sf to SpatialpointDataFrame:
ne_populated_places_sp <- as(ne_populated_places_sf, "Spatial")
# warmest place:
place_temp <- raster::extract(climate_masked, ne_populated_places_sp, fun = mean, na.rm=TRUE, sp = T)
library(knitr)
place_temp@data %>%
dplyr::select(NAME, bio1_16) %>%
arrange(-bio1_16) %>%
kable()
| NAME | bio1_16 |
|---|---|
| Haapsalu | 5.6 |
| Pärnu | 5.6 |
| Tallinn | 5.4 |
| Viljandi | 5.1 |
| Tartu | 5.1 |
| Narva | 4.8 |
| Kohtla-Järve | 4.6 |
Warmest place is Haapsalu, Pärnu (5.6, 5.6 Celsius degrees)!
Good additional reading about raster::getData: link
Author: Anto Aasa
Supervisors: Anto Aasa & Lika Zhvania eriti LTOM.02.041
Last update: 2022-11-06 12:49:44