Start libraries:

library(sf)
library(tidyverse)
library(rnaturalearth)
library(ggthemes)
library(knitr)
library(ggspatial)
library(tmap)
library(tmaptools)
library(lubridate)
library(raster)
library(rgeos)
library(ggmap)

options(scipen = 999)

1 Calculate area of polygon

In real life the spatial distribution of any phenomena is usually not regular. Irregular is also the distribution of administrative units. This means, if we want to compare different municipalities with each other, absolute values may not be the best option (at least we are then unable to describe the full picture). Next to the absolute values very often density is used. This means we present value per spatial unit (e.g. km2). Let us try to calculate areas of municipalities and then calculate in which municipality is the highest density of beef cattle:

# calculate area for each polygon:
municip$area <- st_area(municip)

# Show areas of first 5 municipalities:
municip$area[1:5]
## Units: [m^2]
## [1]   11896170  207920558   73264580 2717872552 1032406180

We see that, polygon areas are calculated in same units as the reference system of geolayer (currently meters) and the variable type is Units. In current case it is reasonable to express it in square kilometers. We can convert units from square meter to square kilometer and store it as numeric:

municip <- municip %>% 
  mutate(area = as.numeric(area) / 1000000)

glimpse(municip)
## Rows: 79
## Columns: 7
## $ ONIMI    <chr> "Ruhnu vald", "Muhu vald", "Viimsi vald", "Saaremaa vald", "H…
## $ OKOOD    <chr> "0689", "0478", "0890", "0714", "0205", "0303", "0907", "0897…
## $ MNIMI    <chr> "Saare maakond", "Saare maakond", "Harju maakond", "Saare maa…
## $ MKOOD    <chr> "0074", "0074", "0037", "0074", "0039", "0068", "0056", "0084…
## $ TYYP     <chr> "1", "1", "1", "1", "1", "1", "1", "4", "1", "4", "1", "4", "…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((455191.3 64..., MULTIPOLYGON (((…
## $ area     <dbl> 11.89617, 207.92056, 73.26458, 2717.87255, 1032.40618, 17.330…

Where is the biggest / smallest municipality (by area) of Estonia?

# filter biggest and smallest
municip_extreme <- municip %>% 
  filter(area == min(area) | area == max(area)) #"|" corresponds to 'OR'

Create the map (with ggplot2):

ggplot()+
  theme_minimal()+
  geom_sf(data = municip, fill="grey96", colour = "grey80", size = 0.25)+
  geom_sf(data = municip_extreme, aes(fill = as.factor(area), colour = as.factor(area)), size=1)+
  geom_sf_text(data = municip_extreme, aes(label=ONIMI))+
  guides(fill=F, colour = F)+
  scale_fill_manual(values =c("red", "dodgerblue"))+
  scale_colour_manual(values =c("red", "dodgerblue"))
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

What is the area of biggest municipality? Smallest? What is average area of municipalities?

2 Spatial density

Let’s turn back to the geolayer of livestock in Estonia. First calculate the area of municipalities and convert it to square kilometers!

# Calculate the area by your self!!!

Hopefully it was easy! :)
In next step calculate the spatial density of beef cattle in municipalities:

# it can be done in several ways
# for example you can add calculation directly to the plotting syntax::
ggplot()+
  theme_minimal()+
  geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_cattle / area), colour = "grey60", size = 0.25)+
  scale_fill_viridis_c(na.value = "red")

# Or you can calculate the density as separate column and after that draw the map:
agrAnimal_2_sf_municip_aggr <- agrAnimal_2_sf_municip_aggr %>% 
  mutate(beef_dens = beef_cattle / area)

ggplot()+
  theme_minimal()+
  geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_dens), colour = "grey60", size=0.25)+
  scale_fill_viridis_c(na.value = "red")+
  labs(fill = "animals\nper\nkm2")

In which municipality is the beef cattle density highest?

# geom column slows down the query if we want to get quick overview. 
#Copy the layer and delete the geometry column:
temp0 <- agrAnimal_2_sf_municip_aggr %>% 
  st_drop_geometry()

# municipality with highest density of beef cattle:
temp0 %>% 
  filter(beef_dens == max(beef_dens)) %>% # filter max value
  kable() # format the output
ONIMI OKOOD MNIMI MKOOD TYYP municip_key beef_cattle estonian_holstein_cattle estonian_native_cattle estonian_red_cattle goats pigs sheeps area beef_dens

No results? First you have to remove NA values! (NA - Not Available)

To remove all NA’s from table you can use na.omit():

temp1 <- na.omit(temp0)

Previous command deletes all rows where value is NA.
But if you want to filter NA by column then it’s reasonable to use is.na() operator:

temp2 <- temp0 %>% 
  filter(!is.na(beef_dens))

Now we can try to query again in which municipality the beef livestock density is the highest:

temp2 %>% 
  filter(beef_dens == max(beef_dens)) %>% 
  kable()
ONIMI OKOOD MNIMI MKOOD TYYP municip_key beef_cattle estonian_holstein_cattle estonian_native_cattle estonian_red_cattle goats pigs sheeps area beef_dens
Kihnu vald 0303 Pärnu maakond 0068 1 kihnu vald 147 6 2 NA NA NA 457 17.32593 8.484393

Is it higher than population density? :)

In some cases we don’t want to remove NA-values, but replace them with other value, e.g. zero:

# replace NA with 0:
temp3 <- temp0 %>% 
  mutate(beef_dens = ifelse(is.na(beef_dens), 0, beef_dens))

# or:
temp4 <- temp0 # make copy
temp4$beef_dens[is.na(temp4$beef_dens)] <- 0 # replace NA with 0

How can we select top 10 municipalities with highest / lowest density of cattle?

# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>% 
  select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(beef_dens) %>% 
  head(10) %>% 
  kable()

# highest density in top10 municipalities:
temp3 %>% 
  select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(-beef_dens) %>% # sort from biggest to smallest
  head(10) %>% 
  kable()

Not working?! It’s because there is a conflict between raster and dplyr package. R doesn’t know from which library the functionality of select() should be applied (actually R is using the functionality from the package which was switched on last). Currently R tries it from package raster…
To avoid it:

# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>% 
  dplyr::select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(beef_dens) %>% 
  head(10) %>% 
  kable()
ONIMI beef_cattle beef_dens
Keila linn NA 0
Maardu linn NA 0
Loksa linn NA 0
Viljandi linn NA 0
Tallinn NA 0
Rakvere linn NA 0
Võru linn NA 0
Sillamäe linn NA 0
Narva linn NA 0
Kohtla-Järve linn NA 0
# highest density in top10 municipalities:
temp3 %>% 
  dplyr::select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(-beef_dens) %>% # sort from biggest to smallest
  head(10) %>% 
  kable()
ONIMI beef_cattle beef_dens
Kihnu vald 147 8.484393
Antsla vald 2357 5.741432
Haapsalu linn 1546 5.687526
Muhu vald 1053 5.064602
Ruhnu vald 57 4.791629
Jõhvi vald 564 4.551057
Vormsi vald 427 4.495625
Hiiumaa vald 4046 3.918872
Valga vald 2862 3.817733
Rapla vald 3277 3.812873

3 Base map / spatial context

Sometimes we are facing not only with the fact that thematic layer doesn’t fit well with base map, but even bigger problem is that you don’t have a proper base map.
One nice possibility is to use geolayers from Natural Earth. And if you are R-user you can to it directly from R vignette.

3.1 Import spatial data

In next example we have some dataset about genetic variance collected in different locations. Your task is to map it to reveal potential spatial patterns. Dataset contains geographical coordinates, sample ID and variance (variable1) of some gene parameter (content is not important right now).

Import the data:

genes <- read.csv2("http://aasa.ut.ee/Rspatial/data/geneticVariance.csv")
glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <chr> "-5.67338", "-5.67338", "-5.67338", "-5.67338", "-5.67338", …
## $ longitude <chr> "145.17883", "145.17883", "145.17883", "145.17883", "145.178…

Everything is okay?
Look again! Latitude and longitude are stored as factors. Convert them to numeric (it can’t be done directly! First you have to convert it to character):

genes <- genes %>% 
  mutate(longitude = as.numeric(as.character(longitude)), 
         latitude = as.numeric(as.character(latitude)))

glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …

Visualize it:

ggplot()+
  geom_point(data = genes, aes(x = longitude, y = latitude, colour = variable1))

The number of dots on plot is 12, but the number of rows in dataset is 70. Why there is such a big difference?
It is because in every location several samples were collected. Can we avoid this overlapping and bring out all the samples??
As usual there are several options:

  • jitter the points randomly
  • calculate average (or median) value for every location

3.2 Jitter the points

Principles of this operation are well described in here link. But we are not doing it just on plot but jitter the locations in data table.

3.2.1 Convert data table to spatial (sf) object

In GIS the most common data format is probably SHP. It works quite well, but has several disadvantages (one layer consists of several files [geometry, attributes, reference system etc]). Today on raising standard for the spatial objects is simple features (sf)

For point data this conversion is very simple:

glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …
genes_sf <- st_as_sf(genes, coords = c("longitude", "latitude"), crs = 4326)

class(genes_sf)
## [1] "sf"         "data.frame"

And now we can jitter the points:

genes_sf_jitt <- st_jitter(genes_sf, factor = 0.02) # read from Help (?st_jitter), how the 'factor' parameter works!

ggplot()+
  geom_sf(data = genes_sf_jitt, aes(colour = variable1))+
  scale_colour_viridis_c()

Better! But still two major problems: jittering doesn’t clarify the picture (maybe we should try calculate average for every location) and secondly although we can see the coordinate values on axes, it is not enough. For better spatial context we need a proper base map!

First we calculate average values for every location:

glimpse(genes)
## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …
genes_aggr <- genes %>% 
  group_by(longitude, latitude) %>% 
  summarise(var1_mean = mean(variable1)) %>% 
  ungroup()
## `summarise()` has grouped output by 'longitude'. You can override using the
## `.groups` argument.
# see the result:
genes_aggr
## # A tibble: 12 × 3
##    longitude latitude var1_mean
##        <dbl>    <dbl>     <dbl>
##  1      140.    -5.48  3959071.
##  2      143.    -5.85  3633741.
##  3      144.    -4.19  4430432.
##  4      144.    -6.15  3913224.
##  5      145.    -6.02  3067700.
##  6      145.    -5.67  3318188.
##  7      146.    -6.54  3404230.
##  8      146.    -6.97  3936487.
##  9      147.    -6.68  1610366 
## 10      148.    -9.99  3262092 
## 11      152.    -4.64  2663686.
## 12      152.    -4.22  2069084.

The result is just a plain table. We have to convert it to spatial sf-object:

genes_aggr_sf <- st_as_sf(genes_aggr, coords = c("longitude", "latitude"), crs = 4326) # 4326 refers to WGS84!

Plot it with tmap in interactive mode! look session 1

3.3 Base map for spatial context

3.3.1 shp-layers as base maps

There are many different geolayers available at different scale level. Currently we download just country polygons:

countries50 <- ne_download(scale = 50, type = 'countries', category = 'cultural', returnclass = "sf")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Anto Aasa\AppData\Local\Temp\RtmpsPDrc7", layer: "ne_50m_admin_0_countries"
## with 242 features
## It has 168 fields
## Integer64 fields read as strings:  NE_ID

Plot the base map and sample points

ggplot()+
  geom_sf(data = countries50, fill = "white", colour = "grey", size = 0.25)+
  geom_sf(data = genes_aggr_sf, colour = "red")

Experienced geographer can say that gene samples are collected from Papua New Guinea and Indonesia. Select those countries from base map layer:

countries50_ii <- countries50 %>% 
  filter(ADMIN == "Papua New Guinea" | ADMIN == "Indonesia") # '|' means 'OR'!!

#plot:
ggplot()+
  geom_sf(data = countries50_ii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data=genes_aggr_sf, colour = "red")

Much better! But we should trim the base map more (crop the empty western part):

# Crop by coordinates:
countries50_iii <- st_crop(countries50_ii, c(xmin = 138, xmax = 155, ymin = -12, ymax = 0))

#plot:
ggplot()+
  geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data = genes_aggr_sf, colour = "red")

This is already pretty good. And now the spatial pattern:

ggplot()+
  geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data = genes_aggr_sf, aes(colour = var1_mean), size=3)+
  scale_colour_viridis_c()

3.3.2 3rd-party base maps

When you want to add good spatial context to your thematic map, the best option is usually to create my your own. There are many good data sources available (Land Board Estonia, OSM, Natural Earth etc).
The only problem is, that it may take a lot of time to create good, relevant and informative map. For quicker solution we can use base maps from other service providers (Google, OSM, Stamen etc).
Next we can try possibilities offered by ggmap. Syntax is slightly different, but the principles are the same:

# calculate & define bounding box: 
# calculate bounding box of map extent:
gene_base_bbox <- st_bbox(countries50_iii)
gene_bbox <- st_bbox(gene_base_bbox) %>% as.numeric()

# it can be defined as an independednt object:
bbox <- c(left = gene_bbox[1], 
          bottom = gene_bbox[2], 
          right = gene_bbox[3],
          top = gene_bbox[4])

# download the map:
gene_stamen_map <- get_stamenmap(bbox, maptype = "terrain-background", zoom = 7)

#plot it:
ggmap(gene_stamen_map, extent = "device")

Convert sf to data frame and plot it with geom_point:

# coordinates as table:
genes_aggr_crd <- genes_aggr_sf %>% 
  st_coordinates() %>% # extract coordinates
  as_tibble() # store them as table

# extract attributes from sf-object:
genes_aggr_df <- genes_aggr_sf %>% 
  st_drop_geometry()

# bind columns together:
genes_aggr_df <- bind_cols(genes_aggr_df, genes_aggr_crd)

ggmap(gene_stamen_map, extent = "device")+
  theme_minimal()+
  theme(axis.title = element_blank(),
        axis.text = element_blank())+
  geom_point(data = genes_aggr_df, aes(x = X, y = Y, fill = var1_mean), size=3, shape = 22, colour = "grey30")+ # you have to define coordinates columns as well!
  scale_fill_gradientn(colours = c("navyblue", "grey", "red"))+ # define your own palette
  labs(title = "Gene map", 
       caption = "A. Aasa",
       fill = "value")

This was an example how to put spatial points on base map. Next, we look example of line maps.

3.4 Line maps

For that we use data about flights of Estonian national airline Nordica. Dataset covers all the flights operated by Nordica from December 2, 2018 to December 3, 2018. And our aim is to create typical destinations map:

Read the data and check file structure:

flights <- read_delim("http://aasa.ut.ee/Rspatial/data/nordicaFlights.csv", 
    delim = ";", escape_double = FALSE, locale = locale(encoding = "ISO-8859-1"), 
    trim_ws = TRUE)
## Rows: 94 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (6): date, from, to, fligjt, airplane, status
## time (2): departure, arrival
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(flights) 
## Rows: 94
## Columns: 8
## $ date      <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:20:00, 09:25:00,…
## $ arrival   <time> 08:25:00, 08:20:00, 07:20:00, 12:30:00, 09:55:00, 12:35:00,…
## $ from      <chr> "Tallinn (TLL)", "Tallinn (TLL)", "Tallinn (TLL)", "München …
## $ to        <chr> "München (MUC)", "Viin (VIE)", "Stockholm (ARN)", "Tallinn (…
## $ fligjt    <chr> "LO8165", "LO8193", "LO8121", "LO8166", "LO790", "LO8194", "…
## $ airplane  <chr> "CR7  i", "CR9  i", "CR9  i", "CR7  i", "CR9  i", "CR9  i", …
## $ status    <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…
flights %>% 
  head() %>% 
  kable()
date departure arrival from to fligjt airplane status
02.12.2018 07:00:00 08:25:00 Tallinn (TLL) München (MUC) LO8165 CR7 i Graafikujärgne.
02.12.2018 07:00:00 08:20:00 Tallinn (TLL) Viin (VIE) LO8193 CR9 i Graafikujärgne.
02.12.2018 07:20:00 07:20:00 Tallinn (TLL) Stockholm (ARN) LO8121 CR9 i Graafikujärgne.
02.12.2018 09:10:00 12:30:00 München (MUC) Tallinn (TLL) LO8166 CR7 i Graafikujärgne.
02.12.2018 09:20:00 09:55:00 Tallinn (TLL) Varssavi (WAW) LO790 CR9 i Graafikujärgne.
02.12.2018 09:25:00 12:35:00 Viin (VIE) Tallinn (TLL) LO8194 CR9 i Graafikujärgne.

All the data is nicely imported. All columns are stored as factors. But this is currently not important. Question is how can we add to the table the geographical coordinates of origin and destination?
As you probably know every airport has unique code (IATA code). Currently those codes are not in separate column. With help of tidyr and stringr package we can separate column with airport name and code to two columns:

# separate airport name and airport code:
flights <- flights %>% 
  separate(from, c("from_name" ,"from_code"), ' \\(')

flights <- flights %>% 
  separate(to, c("to_name" ,"to_code"), '\\(')

#remove ')' from string:
flights <- flights %>% 
  mutate(from_code = str_replace(from_code, "\\)", ""))

flights <- flights %>% 
  mutate(to_code = str_replace(to_code, "\\)", ""))

flights %>% 
  head() %>% 
  kable()
date departure arrival from_name from_code to_name to_code fligjt airplane status
02.12.2018 07:00:00 08:25:00 Tallinn TLL München MUC LO8165 CR7 i Graafikujärgne.
02.12.2018 07:00:00 08:20:00 Tallinn TLL Viin VIE LO8193 CR9 i Graafikujärgne.
02.12.2018 07:20:00 07:20:00 Tallinn TLL Stockholm ARN LO8121 CR9 i Graafikujärgne.
02.12.2018 09:10:00 12:30:00 München MUC Tallinn TLL LO8166 CR7 i Graafikujärgne.
02.12.2018 09:20:00 09:55:00 Tallinn TLL Varssavi WAW LO790 CR9 i Graafikujärgne.
02.12.2018 09:25:00 12:35:00 Viin VIE Tallinn TLL LO8194 CR9 i Graafikujärgne.

Airport codes are now nicely in separate columns. Next step is to find additional dataset where we have next to the airport codes also the geographical coordinates of airports.

airports <- read.csv("https://datahub.io/core/airport-codes/r/airport-codes.csv")

airports2 <- airports %>% 
  filter(iata_code != "") 

airports2 %>% 
  head() %>% 
  kable()
ident type name elevation_ft continent iso_country iso_region municipality gps_code iata_code local_code coordinates
03N small_airport Utirik Airport 4 OC MH MH-UTI Utirik Island K03N UTK 03N 169.852005, 11.222
07FA small_airport Ocean Reef Club Airport 8 NA US US-FL Key Largo 07FA OCA 07FA -80.274803161621, 25.325399398804
0AK small_airport Pilot Station Airport 305 NA US US-AK Pilot Station PQS 0AK -162.899994, 61.934601
0CO2 small_airport Crested Butte Airpark 8980 NA US US-CO Crested Butte 0CO2 CSE 0CO2 -106.928341, 38.851918
0TE7 small_airport LBJ Ranch Airport 1515 NA US US-TX Johnson City 0TE7 JCY 0TE7 -98.62249755859999, 30.251800537100003
13MA small_airport Metropolitan Airport 418 NA US US-MA Palmer 13MA PMX 13MA -72.31140136719999, 42.223300933800004
# Is airport of Tallinn (TLL) available: 
airports2 %>% 
  filter(iata_code == "TLL") %>% 
  kable()
ident type name elevation_ft continent iso_country iso_region municipality gps_code iata_code local_code coordinates
EETN large_airport Lennart Meri Tallinn Airport 131 EU EE EE-37 Tallinn EETN TLL 24.832799911499997, 59.41329956049999

As the airport coordinates are stored in one column we have to use separate() function once again:

airp_crd <- airports2 %>% 
  separate(coordinates, c("lng" ,"lat"), ", ")

# convert both columns to numeric and select relevant columns:
airp_crd <- airp_crd %>% 
  mutate(lng = as.numeric(lng), lat = as.numeric(lat)) %>% 
  dplyr::select(iata_code, lng, lat, municipality)

glimpse(airp_crd)
## Rows: 9,225
## Columns: 4
## $ iata_code    <chr> "UTK", "OCA", "PQS", "CSE", "JCY", "PMX", "WLR", "NUP", "…
## $ lng          <dbl> 169.85200, -80.27480, -162.89999, -106.92834, -98.62250, …
## $ lat          <dbl> 11.22200, 25.32540, 61.93460, 38.85192, 30.25180, 42.2233…
## $ municipality <chr> "Utirik Island", "Key Largo", "Pilot Station", "Crested B…

Next we have to join tables (see Practical session 1)

# join flights and airport table by origin:
flights2 <- left_join(flights, airp_crd, by = c("from_code" = "iata_code"))

#rename coordinates columns:
flights2 <- flights2 %>% 
  rename(from_x = lng, from_y = lat, from_mun = municipality)

#join flights and airport table by destination:
flights2 <- left_join(flights2, airp_crd, by = c("to_code" = "iata_code"))

#rename coordinates columns:
flights2 <- flights2 %>% 
  rename(to_x = lng, to_y = lat, to_mun = municipality)

glimpse(flights2)
## Rows: 104
## Columns: 16
## $ date      <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:10:00,…
## $ arrival   <time> 08:25:00, 08:25:00, 08:20:00, 07:20:00, 12:30:00, 12:30:00,…
## $ from_name <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "München", "Münc…
## $ from_code <chr> "TLL", "TLL", "TLL", "TLL", "MUC", "MUC", "TLL", "VIE", "TLL…
## $ to_name   <chr> "München ", "München ", "Viin ", "Stockholm ", "Tallinn ", "…
## $ to_code   <chr> "MUC", "MUC", "VIE", "ARN", "TLL", "TLL", "WAW", "TLL", "AMS…
## $ fligjt    <chr> "LO8165", "LO8165", "LO8193", "LO8121", "LO8166", "LO8166", …
## $ airplane  <chr> "CR7  i", "CR7  i", "CR9  i", "CR9  i", "CR7  i", "CR7  i", …
## $ status    <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…
## $ from_x    <dbl> 24.83280, 24.83280, 24.83280, 24.83280, 11.78610, 11.69030, …
## $ from_y    <dbl> 59.4133, 59.4133, 59.4133, 59.4133, 48.3538, 48.1378, 59.413…
## $ from_mun  <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "Munich", "Munic…
## $ to_x      <dbl> 11.78610, 11.69030, 16.56970, 17.91860, 24.83280, 24.83280, …
## $ to_y      <dbl> 48.3538, 48.1378, 48.1103, 59.6519, 59.4133, 59.4133, 52.165…
## $ to_mun    <chr> "Munich", "Munich", "Vienna", "Stockholm", "Tallinn", "Talli…

Plot the flight on base map:

ggplot()+
  geom_sf(data = countries50, fill = "gray80", col = "gray40", size = 0.5) + 
  geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")

We focus on Europe. For that we can use st_crop() function or we can filter out only European countries:

europe <- countries50 %>% 
  filter(REGION_UN == "Europe")

# plot:
ggplot()+
  geom_sf(data = europe)+
  geom_segment(data = flights2, aes(x=from_x, xend= to_x, y=from_y, yend = to_y), col = "red")

Base map needs some additional croping

europe2 <- st_crop(europe, c(xmin = -10, 
                             xmax = 41,
                             ymin = 45,
                             ymax= 70))


# plot:
ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")

Use curves instead of straight lines:

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

4 Save visualization (png, pdf)

# png:

png("flights.png", res=300, units = "cm", height = 15, width = 15)

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

dev.off() # It is crucial to close the writing session!



# pdf:
pdf("flights.pdf", height = 15, width = 15)

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

dev.off() # It is crucial to close the writing session!

The last version looks already like a thematic map. But still there are some shortcomings:

  • map scale and north arrow is missing;
  • map title is missing.

We can try to fix it. Add the north arrow and map scale:

Plot it on top stamen map! In case of ggmap you have to use geom_segment() instead of geom_curve()!

5 Physical map & spatial queries

5.1 Physical map of Estonia

To create the map we need geolayers:

  • elevation
  • rivers
  • lakes
  • roads
  • populated places

You can find this kind of datasets from many different places. For example you can download data directly from raster library:
Download country contour (level = 0) and convert it to sf:

gadm_0_estonia <- raster::getData('GADM', country = 'EST', level = 0)
gadm_0_estonia <- st_as_sf(gadm_0_estonia)

If direct download doesn’t work you can download it from here: http://aasa.ut.ee/Rspatial/data/gadm36_EST_0_sf.rds
Use download.file() & read_rds().

Actually it is better to obtain all the data layers from the same source. Let’s get more familiar with Natural Earth. You can download data from there manually. But we can do it directly from R as well. For that you need to install package rnaturalearth.

# install from CRAN:
install.packages("rnaturalearth")

Plot country contour:

tm_shape(gadm_0_estonia)+
  tm_borders()

In next step we try to download layer of countries:

# download:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip", destfile="ne_10m_admin_0_countries_lakes.zip")

# unzip:
unzip("ne_10m_admin_0_countries_lakes.zip")

# import to R:
ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")

# plot to see the visual:
tm_shape(ne_countries_sf)+
  tm_borders()

# Check the table structure (it's faster, when you drop the geometry column):
ne_countries_sf %>% 
  st_drop_geometry() %>% 
  glimpse()

# We need only contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

# plot the result:
tm_shape(ne_countries_sf)+
  tm_borders("red")

There is a chance that direct download isn’t working (that’s the main problem with 3rd party data sources… Usually it appears during the live demo’s).

Next download rivers, lakes, roads & populated places (it may not work either):

# Rivers :
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_lake_centerlines.zip",
              destfile = "ne_10m_rivers_lake_centerlines.zip")

unzip("ne_10m_rivers_lake_centerlines.zip")

# Rivers (additional data for Europe): 
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_europe.zip", 
              destfile="ne_10m_rivers_europe.zip")

unzip("ne_10m_rivers_europe.zip")

# Lakes:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip",
              destfile = "ne_10m_lakes.zip")

unzip("ne_10m_lakes.zip")

# Lakes (Europe supplement):
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes_europe.zip",
              destfile = "ne_10m_lakes_europe.zip")

unzip("ne_10m_lakes_europe.zip")

# populated places:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip", 
              destfile = "ne_10m_populated_places.zip")

unzip("ne_10m_populated_places.zip")


# roads:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip",
              destfile = "ne_10m_roads.zip")

unzip("ne_10m_roads.zip")

If previous download wasn’t successful you should download all the layers from course web page and after you can import layers to R:

download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_admin_0_countries_lakes.zip", destfile = "ne_10m_admin_0_countries_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes.zip", destfile = "ne_10m_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes_europe.zip", destfile = "ne_10m_lakes_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_populated_places.zip", destfile = "ne_10m_populated_places.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_europe.zip", destfile = "ne_10m_rivers_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_lake_centerlines.zip", destfile = "ne_10m_rivers_lake_centerlines.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_roads.zip",  destfile = "ne_10m_roads.zip")

# unzip all the layers!
unzip("filename.zip")

Import downloaded data:

ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")
ne_rivers_sf <- st_read("ne_10m_rivers_lake_centerlines.shp")
ne_rivers_europe_sf <- st_read("ne_10m_rivers_europe.shp")
ne_lakes_europe_sf <- st_read("ne_10m_lakes_europe.shp")
ne_lakes_sf <- st_read("ne_10m_lakes.shp")
ne_populated_places_sf <- st_read("ne_10m_populated_places.shp")
ne_roads_sf <- st_read("ne_10m_roads.shp")

Test, how it looks:

tmap_mode("plot")
## tmap mode set to plotting
tm_shape(ne_rivers_sf)+
  tm_lines("dodgerblue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")
## Warning: The shape ne_rivers_sf contains empty units.

As you see, all the data is global. But we only need data relevant to Estonia. For that we can use st_intersection().
Check, how the function works:

?st_intersection

# if sf-package is not active:
??st_intersection

Now clip all the layers with Estonian contour:

# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf)  # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)
# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)

ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf)  # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)

Success? Check the result:

tm_shape(ne_countries_sf)+
  tm_borders("grey")+
tm_shape(ne_roads_sf)+
  tm_lines("red")

Sometimes the geometry is not valid or invalid. In that case the function st_make_valid()may help:

# for example:
ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)

You can read more about tidying the feature geometry from here: link.

Plot all the results:

tm_shape(ne_countries_sf)+
  tm_borders()+
tm_shape(ne_rivers_sf)+
  tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")+
tm_shape(ne_lakes_sf)+
  tm_polygons("blue")+
tm_shape(ne_roads_sf)+
  tm_lines("red")+
tm_shape(ne_populated_places_sf)+
  tm_dots("red")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")

Number of places is not very big, but adds some additional spatial context to the map.

As it was already mentioned there are always different options to achieve the goal (and quite often some of them don’t work when needed…). For example you can download data from Natural Earth with help of rnaturalearth::ne_download():

ne_roads  <- ne_download(scale = "large", type = "roads", category = "cultural", returnclass = "sf")
class(ne_roads)

# you should remove unneeded objects from workspace:
rm()

It arrives as sf.

5.1.1 Now get the elevation data!

For elevation we can use already familiar getData function from library called raster. Actually there are several datasets available (administrative contours, elevation, climatic data etc. Read more closely from here: link

From there we can use elevation data aggregated from SRTM 90 m (Shuttle Radar Topography Mission) elevation data link.
Elevation raster for Estonia:

elev <- ?raster::getData('alt', country = "EST", mask = T)

But sometimes the download is not working… In that case you can download the elevation layer from link which appears in error message. For Estonia it’s https://biogeo.ucdavis.edu/data/diva/msk_alt/EST_msk_alt.zip

Finally you have the layer and you can plot it:

# check the type of downloaded object:
class(elev)
## [1] "RasterLayer"
## attr(,"package")
## [1] "raster"
# plot:
plot(elev)

Nice! But resolution is not very high. We can access also SRTM 90-meter resolution data! SRTM is downloaded by tiles. We have to define Estonian coordinates (24E 58N is quite safe bet!). Be patient raster is quite big and download may take some time:

srtm <- raster::getData('SRTM', lon = 24, lat = 58)
## Warning in raster::getData("SRTM", lon = 24, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead

Check the result:

plot(srtm)

It’s only western part of Estonia. Download east as well:

srtm2 <- raster::getData('SRTM', lon = 27, lat = 58)
## Warning in raster::getData("SRTM", lon = 27, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead

Downloaded rasters are stored as separate objects. We can bind them together with mosaic():

srtmmosaic <- mosaic(srtm, srtm2, fun = mean)
plot(srtmmosaic)

Better! But if you want to keep it in borders of Estonia, we can crop() and mask() raster outside of the borders:

srtmmosaic <- crop(srtmmosaic, extent(ne_countries_sf))

srtmmosaic <- mask(srtmmosaic, ne_countries_sf)

plot(srtmmosaic)

Why we need crop()?
Looks like we have now all the layers we need. And we can put all the layers in logical order together. We can avoid ggplot() and tmap(). Use just plot():

library(maptools) # enables function 'pointLabel' to add text labels

#plot(st_geometry(ne_countries_sf), col = "grey")
plot(srtmmosaic, col = (terrain.colors(20)))
plot(ne_roads_sf, add = T, col = "red", lwd = 0.1)
plot(ne_lakes_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_europe_sf, add = T, col = "dodgerblue1")
plot(ne_populated_places_sf, col = "black", add = T)

# Add place names
pointLabel(st_coordinates(ne_populated_places_sf),labels = ne_populated_places_sf$NAME)

Some people prefer tmap (it’s more intuitive and developed specially for maps):

tm_shape(srtmmosaic)+
  tm_raster(palette = c("darkgreen", "yellow", "orange", "red"), style = "cont", title = "m.a.s.l.")+
tm_shape(ne_rivers_sf)+
  tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")+
tm_shape(ne_lakes_sf)+
  tm_polygons("dodgerblue1")+
tm_shape(ne_roads_sf)+
  tm_lines("red")+
tm_shape(ne_populated_places_sf)+
  tm_dots("red")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")+
tm_scale_bar(position = c("left", "bottom"))+
tm_layout("Physical map of Estonia")+
tm_credits("A. Aasa")
## stars object downsampled to 1717 by 583 cells. See tm_shape manual (argument raster.downsample)

Satistfactory?

5.2 Some additional examples of spatial analysis.

There are quite often the cases where we want to combine several spatial layers and derive new information from this combination.
Let us pretend the situation where we want to calculate average elevation above sea level for every county in Estonia.

We need the layer of Estonian counties in format of SpatialPolygonsDataFrame. Download it again:

gadm_1 <- raster::getData('GADM', country = 'EST', level = 1)
## Warning in raster::getData("GADM", country = "EST", level = 1): getData will be removed in a future version of raster
## . Please use the geodata package instead
class(gadm_1)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# extract mean from raster:
library(maptools)
alt_EST<- raster::getData('alt', country='EST', mask=TRUE)
## Warning in raster::getData("alt", country = "EST", mask = TRUE): getData will be removed in a future version of raster
## . Please use the geodata package instead
plot(alt_EST)

region_Ave_Elev <- raster::extract(alt_EST, gadm_1, fun = mean, na.rm = TRUE, sp = T)

# To continue you have to know in which format is the output:
class(region_Ave_Elev)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
#Check the data:
region_Ave_Elev@data %>% 
  head()
##    GID_0  NAME_0   GID_1   NAME_1                       VARNAME_1 NL_NAME_1
## 1    EST Estonia EST.1_1    Harju          Harjumaa|Harju maakond      <NA>
## 9    EST Estonia EST.2_1     Hiiu       Hiiumaa|Hiiu maakond|Dagö      <NA>
## 10   EST Estonia EST.3_1 Ida-Viru    Ida-Virumaa|Ida-Viru maakond      <NA>
## 11   EST Estonia EST.4_1    Järva   Jaerva|Järvamaa|Järva maakond      <NA>
## 12   EST Estonia EST.5_1   Jõgeva Jogeva|Jõgevamaa|Jõgeva maakond      <NA>
## 14   EST Estonia EST.7_1    Lääne  Laeaene|Läänemaa|Lääne maakond      <NA>
##     TYPE_1 ENGTYPE_1 CC_1 HASC_1 EST_msk_alt
## 1  Maakond    County <NA>  EE.HA    49.80205
## 9  Maakond    County <NA>  EE.HI    17.16073
## 10 Maakond    County <NA>  EE.IV    53.54464
## 11 Maakond    County <NA>  EE.JR    79.18902
## 12 Maakond    County <NA>  EE.JN    69.83882
## 14 Maakond    County <NA>  EE.LN    18.03521

Plot the results

# Convert SpatialpolygonDataFrame to sf:
region_Ave_Elev_sf <- st_as_sf(region_Ave_Elev)

# round the mean value (for map labels!):
region_Ave_Elev_sf <- region_Ave_Elev_sf %>% 
  mutate(EST_msk_alt2 = round(EST_msk_alt, 0))

# create tmap as separate object:

est_elev_counties <- tm_shape(region_Ave_Elev_sf)+
  tm_polygons("EST_msk_alt", style = "pretty", palette = "viridis", title = "m.a.s.l.")+
  tm_text("EST_msk_alt2")

# check the result:
print(est_elev_counties)

We can save the previous map:

# first we need to know the aspect ratio of map:
asp <- tmaptools::get_asp_ratio(est_elev_counties)

tmap_save(est_elev_counties, "est_elev_counties.png", height = 5, with = 5 * asp, units = "in", dpi = 300)

# If you don't remember the working directory:
getwd()
# the exported png-map should be there!

5.3 In next example we analyse where is the warmest place in Estonia?

# download climate means (can be slow!):
climate <- raster::getData('worldclim', var ='bio', res = 0.5, lon = 26, lat = 58)
## Warning in raster::getData("worldclim", var = "bio", res = 0.5, lon = 26, : getData will be removed in a future version of raster
## . Please use the geodata package instead
plot(climate)

# select Annual Mean Temperature only:
climate_2 <- climate$bio1_16
plot(climate_2) # Wrong units! Celsius degree * 10

#  mask it with Estonian contour:
climate_2 <- crop(climate_2, extent(ne_countries_sf))
climate_masked <- mask(climate_2, ne_countries_sf)
plot(climate_masked)

# Annual modelled mean air temperature map:
climate_masked <- climate_masked / 10 # to get real values
plot(st_geometry(ne_countries_sf))
plot(climate_masked, add= T, col = topo.colors(20))
plot(st_geometry(ne_countries_sf), add = T)

# we have some populated places:
tm_shape(climate_masked)+
  tm_raster("bio1_16", palette = c("darkgreen", "grey90", "orange"), style = "cont")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")

#sf to SpatialpointDataFrame:
ne_populated_places_sp <- as(ne_populated_places_sf, "Spatial")


# warmest place:
place_temp <- raster::extract(climate_masked, ne_populated_places_sp, fun = mean, na.rm=TRUE, sp = T)

library(knitr)
place_temp@data %>% 
  dplyr::select(NAME, bio1_16) %>% 
  arrange(-bio1_16) %>% 
  kable()
NAME bio1_16
Haapsalu 5.6
Pärnu 5.6
Tallinn 5.4
Viljandi 5.1
Tartu 5.1
Narva 4.8
Kohtla-Järve 4.6

Warmest place is Haapsalu, Pärnu (5.6, 5.6 Celsius degrees)!

Good additional reading about raster::getData: link

Author: Anto Aasa
Supervisors: Anto Aasa & Lika Zhvania eriti LTOM.02.041
Last update: 2022-11-06 12:49:44