Start libraries:

library(sf)
library(tidyverse)
library(rnaturalearth)
library(ggthemes)
library(knitr)
library(ggspatial)
library(tmap)
library(tmaptools)
library(lubridate)
library(raster)
library(rgeos)
library(ggmap)

options(scipen = 999)

1 Calculate area of polygon

In real life the spatial distribution of any phenomena is usually not regular. Irregular is also the distribution of administrative units. This means, if we want to compare different municipalities with each other, absolute values may not be the best option (at least we are then unable to describe the full picture). Next to the absolute values very often density is used. This means we present value per spatial unit (e.g. km2). Let us try to calculate areas of municipalities and then calculate in which municipality is the highest density of beef cattle:

# calculate area for each polygon:
municip$area <- st_area(municip)

# Show areas of first 5 municipalities:
municip$area[1:5]

## Units: [m^2]
## [1]   11896170  207920558   73264580 2717872552 1032406180

We see that, polygon areas are calculated in same units as the reference system of geolayer (currently meters) and the variable type is Units. In current case it is reasonable to express it in square kilometers. We can convert units from square meter to square kilometer and store it as numeric:

municip <- municip %>% 
  mutate(area = as.numeric(area) / 1000000)

glimpse(municip)

## Rows: 79
## Columns: 7
## $ ONIMI    <chr> "Ruhnu vald", "Muhu vald", "Viimsi vald", "Saaremaa vald", "H…
## $ OKOOD    <chr> "0689", "0478", "0890", "0714", "0205", "0303", "0907", "0897…
## $ MNIMI    <chr> "Saare maakond", "Saare maakond", "Harju maakond", "Saare maa…
## $ MKOOD    <chr> "0074", "0074", "0037", "0074", "0039", "0068", "0056", "0084…
## $ TYYP     <chr> "1", "1", "1", "1", "1", "1", "1", "4", "1", "4", "1", "4", "…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((455191.3 64..., MULTIPOLYGON (((…
## $ area     <dbl> 11.89617, 207.92056, 73.26458, 2717.87255, 1032.40618, 17.330…

Where is the biggest / smallest municipality (by area) of Estonia?

# filter biggest and smallest
municip_extreme <- municip %>% 
  filter(area == min(area) | area == max(area)) #"|" corresponds to 'OR'

Create the map (with ggplot2):

ggplot()+
  theme_minimal()+
  geom_sf(data = municip, fill="grey96", colour = "grey80", size = 0.25)+
  geom_sf(data = municip_extreme, aes(fill = as.factor(area), colour = as.factor(area)), size=1)+
  geom_sf_text(data = municip_extreme, aes(label=ONIMI))+
  guides(fill=F, colour = F)+
  scale_fill_manual(values =c("red", "dodgerblue"))+
  scale_colour_manual(values =c("red", "dodgerblue"))

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

What is the area of biggest municipality? Smallest? What is average area of municipalities?

2 Spatial density

Let’s turn back to the geolayer of livestock in Estonia. First calculate the area of municipalities and convert it to square kilometers!

# Calculate the area by your self!!!

Hopefully it was easy! :)
In next step calculate the spatial density of beef cattle in municipalities:

# it can be done in several ways
# for example you can add calculation directly to the plotting syntax::
ggplot()+
  theme_minimal()+
  geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_cattle / area), colour = "grey60", size = 0.25)+
  scale_fill_viridis_c(na.value = "red")

# Or you can calculate the density as separate column and after that draw the map:
agrAnimal_2_sf_municip_aggr <- agrAnimal_2_sf_municip_aggr %>% 
  mutate(beef_dens = beef_cattle / area)

ggplot()+
  theme_minimal()+
  geom_sf(data = agrAnimal_2_sf_municip_aggr, aes(fill = beef_dens), colour = "grey60", size=0.25)+
  scale_fill_viridis_c(na.value = "red")+
  labs(fill = "animals\nper\nkm2")

In which municipality is the beef cattle density highest?

# geom column slows down the query if we want to get quick overview. 
#Copy the layer and delete the geometry column:
temp0 <- agrAnimal_2_sf_municip_aggr %>% 
  st_drop_geometry()

# municipality with highest density of beef cattle:
temp0 %>% 
  filter(beef_dens == max(beef_dens)) %>% # filter max value
  kable() # format the output

ONIMI	OKOOD	MNIMI	MKOOD	TYYP	municip_key	beef_cattle	estonian_holstein_cattle	estonian_native_cattle	estonian_red_cattle	goats	pigs	sheeps	area	beef_dens

No results? First you have to remove NA values! (NA - Not Available)

To remove all NA’s from table you can use na.omit():

temp1 <- na.omit(temp0)

Previous command deletes all rows where value is NA.
But if you want to filter NA by column then it’s reasonable to use is.na() operator:

temp2 <- temp0 %>% 
  filter(!is.na(beef_dens))

Now we can try to query again in which municipality the beef livestock density is the highest:

temp2 %>% 
  filter(beef_dens == max(beef_dens)) %>% 
  kable()

ONIMI	OKOOD	MNIMI	MKOOD	TYYP	municip_key	beef_cattle	estonian_holstein_cattle	estonian_native_cattle	estonian_red_cattle	goats	pigs	sheeps	area	beef_dens
Kihnu vald	0303	Pärnu maakond	0068	1	kihnu vald	147	6	2	NA	NA	NA	457	17.32593	8.484393

Is it higher than population density? :)

In some cases we don’t want to remove NA-values, but replace them with other value, e.g. zero:

# replace NA with 0:
temp3 <- temp0 %>% 
  mutate(beef_dens = ifelse(is.na(beef_dens), 0, beef_dens))

# or:
temp4 <- temp0 # make copy
temp4$beef_dens[is.na(temp4$beef_dens)] <- 0 # replace NA with 0

How can we select top 10 municipalities with highest / lowest density of cattle?

# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>% 
  select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(beef_dens) %>% 
  head(10) %>% 
  kable()

# highest density in top10 municipalities:
temp3 %>% 
  select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(-beef_dens) %>% # sort from biggest to smallest
  head(10) %>% 
  kable()

Not working?! It’s because there is a conflict between raster and dplyr package. R doesn’t know from which library the functionality of select() should be applied (actually R is using the functionality from the package which was switched on last). Currently R tries it from package raster…
To avoid it:

# give me lowest density in top10 municipalities (arrange() does the thing!):
temp3 %>% 
  dplyr::select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(beef_dens) %>% 
  head(10) %>% 
  kable()

ONIMI	beef_cattle	beef_dens
Keila linn	NA	0
Maardu linn	NA	0
Loksa linn	NA	0
Viljandi linn	NA	0
Tallinn	NA	0
Rakvere linn	NA	0
Võru linn	NA	0
Sillamäe linn	NA	0
Narva linn	NA	0
Kohtla-Järve linn	NA	0

# highest density in top10 municipalities:
temp3 %>% 
  dplyr::select(ONIMI, beef_cattle, beef_dens) %>% 
  arrange(-beef_dens) %>% # sort from biggest to smallest
  head(10) %>% 
  kable()

ONIMI	beef_cattle	beef_dens
Kihnu vald	147	8.484393
Antsla vald	2357	5.741432
Haapsalu linn	1546	5.687526
Muhu vald	1053	5.064602
Ruhnu vald	57	4.791629
Jõhvi vald	564	4.551057
Vormsi vald	427	4.495625
Hiiumaa vald	4046	3.918872
Valga vald	2862	3.817733
Rapla vald	3277	3.812873

3 Base map / spatial context

Sometimes we are facing not only with the fact that thematic layer doesn’t fit well with base map, but even bigger problem is that you don’t have a proper base map.
One nice possibility is to use geolayers from Natural Earth. And if you are R-user you can to it directly from R vignette.

3.1 Import spatial data

In next example we have some dataset about genetic variance collected in different locations. Your task is to map it to reveal potential spatial patterns. Dataset contains geographical coordinates, sample ID and variance (variable1) of some gene parameter (content is not important right now).

Import the data:

genes <- read.csv2("http://aasa.ut.ee/Rspatial/data/geneticVariance.csv")
glimpse(genes)

## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <chr> "-5.67338", "-5.67338", "-5.67338", "-5.67338", "-5.67338", …
## $ longitude <chr> "145.17883", "145.17883", "145.17883", "145.17883", "145.178…

Everything is okay?
Look again! Latitude and longitude are stored as factors. Convert them to numeric (it can’t be done directly! First you have to convert it to character):

genes <- genes %>% 
  mutate(longitude = as.numeric(as.character(longitude)), 
         latitude = as.numeric(as.character(latitude)))

glimpse(genes)

## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …

Visualize it:

ggplot()+
  geom_point(data = genes, aes(x = longitude, y = latitude, colour = variable1))

The number of dots on plot is 12, but the number of rows in dataset is 70. Why there is such a big difference?
It is because in every location several samples were collected. Can we avoid this overlapping and bring out all the samples??
As usual there are several options:

jitter the points randomly
calculate average (or median) value for every location

3.2 Jitter the points

Principles of this operation are well described in here link. But we are not doing it just on plot but jitter the locations in data table.

3.2.1 Convert data table to spatial (sf) object

In GIS the most common data format is probably SHP. It works quite well, but has several disadvantages (one layer consists of several files [geometry, attributes, reference system etc]). Today on raising standard for the spatial objects is simple features (sf)

For point data this conversion is very simple:

glimpse(genes)

## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …

genes_sf <- st_as_sf(genes, coords = c("longitude", "latitude"), crs = 4326)

class(genes_sf)

## [1] "sf"         "data.frame"

And now we can jitter the points:

genes_sf_jitt <- st_jitter(genes_sf, factor = 0.02) # read from Help (?st_jitter), how the 'factor' parameter works!

ggplot()+
  geom_sf(data = genes_sf_jitt, aes(colour = variable1))+
  scale_colour_viridis_c()

Better! But still two major problems: jittering doesn’t clarify the picture (maybe we should try calculate average for every location) and secondly although we can see the coordinate values on axes, it is not enough. For better spatial context we need a proper base map!

First we calculate average values for every location:

glimpse(genes)

## Rows: 70
## Columns: 4
## $ sample.ID <int> 64320287, 78970624, 98571019, 75936285, 49198766, 57194167, …
## $ variable1 <int> 2949376, 3869870, 3357594, 3610312, 2803790, 3722908, 489835…
## $ latitude  <dbl> -5.673380, -5.673380, -5.673380, -5.673380, -5.673380, -6.01…
## $ longitude <dbl> 145.1788, 145.1788, 145.1788, 145.1788, 145.1788, 144.9667, …

genes_aggr <- genes %>% 
  group_by(longitude, latitude) %>% 
  summarise(var1_mean = mean(variable1)) %>% 
  ungroup()

## `summarise()` has grouped output by 'longitude'. You can override using the
## `.groups` argument.

# see the result:
genes_aggr

## # A tibble: 12 × 3
##    longitude latitude var1_mean
##        <dbl>    <dbl>     <dbl>
##  1      140.    -5.48  3959071.
##  2      143.    -5.85  3633741.
##  3      144.    -4.19  4430432.
##  4      144.    -6.15  3913224.
##  5      145.    -6.02  3067700.
##  6      145.    -5.67  3318188.
##  7      146.    -6.54  3404230.
##  8      146.    -6.97  3936487.
##  9      147.    -6.68  1610366 
## 10      148.    -9.99  3262092 
## 11      152.    -4.64  2663686.
## 12      152.    -4.22  2069084.

The result is just a plain table. We have to convert it to spatial sf-object:

genes_aggr_sf <- st_as_sf(genes_aggr, coords = c("longitude", "latitude"), crs = 4326) # 4326 refers to WGS84!

Plot it with tmap in interactive mode! look session 1

3.3 Base map for spatial context

3.3.1 shp-layers as base maps

There are many different geolayers available at different scale level. Currently we download just country polygons:

countries50 <- ne_download(scale = 50, type = 'countries', category = 'cultural', returnclass = "sf")

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\Anto Aasa\AppData\Local\Temp\RtmpsPDrc7", layer: "ne_50m_admin_0_countries"
## with 242 features
## It has 168 fields
## Integer64 fields read as strings:  NE_ID

Plot the base map and sample points

ggplot()+
  geom_sf(data = countries50, fill = "white", colour = "grey", size = 0.25)+
  geom_sf(data = genes_aggr_sf, colour = "red")

Experienced geographer can say that gene samples are collected from Papua New Guinea and Indonesia. Select those countries from base map layer:

countries50_ii <- countries50 %>% 
  filter(ADMIN == "Papua New Guinea" | ADMIN == "Indonesia") # '|' means 'OR'!!

#plot:
ggplot()+
  geom_sf(data = countries50_ii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data=genes_aggr_sf, colour = "red")

Much better! But we should trim the base map more (crop the empty western part):

# Crop by coordinates:
countries50_iii <- st_crop(countries50_ii, c(xmin = 138, xmax = 155, ymin = -12, ymax = 0))

#plot:
ggplot()+
  geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data = genes_aggr_sf, colour = "red")

This is already pretty good. And now the spatial pattern:

ggplot()+
  geom_sf(data = countries50_iii, fill = "white", colour = "grey", size=0.25)+
  geom_sf(data = genes_aggr_sf, aes(colour = var1_mean), size=3)+
  scale_colour_viridis_c()

3.3.2 3rd-party base maps

When you want to add good spatial context to your thematic map, the best option is usually to create my your own. There are many good data sources available (Land Board Estonia, OSM, Natural Earth etc).
The only problem is, that it may take a lot of time to create good, relevant and informative map. For quicker solution we can use base maps from other service providers (Google, OSM, Stamen etc).
Next we can try possibilities offered by ggmap. Syntax is slightly different, but the principles are the same:

# calculate & define bounding box: 
# calculate bounding box of map extent:
gene_base_bbox <- st_bbox(countries50_iii)
gene_bbox <- st_bbox(gene_base_bbox) %>% as.numeric()

# it can be defined as an independednt object:
bbox <- c(left = gene_bbox[1], 
          bottom = gene_bbox[2], 
          right = gene_bbox[3],
          top = gene_bbox[4])

# download the map:
gene_stamen_map <- get_stamenmap(bbox, maptype = "terrain-background", zoom = 7)

#plot it:
ggmap(gene_stamen_map, extent = "device")

Convert sf to data frame and plot it with geom_point:

# coordinates as table:
genes_aggr_crd <- genes_aggr_sf %>% 
  st_coordinates() %>% # extract coordinates
  as_tibble() # store them as table

# extract attributes from sf-object:
genes_aggr_df <- genes_aggr_sf %>% 
  st_drop_geometry()

# bind columns together:
genes_aggr_df <- bind_cols(genes_aggr_df, genes_aggr_crd)

ggmap(gene_stamen_map, extent = "device")+
  theme_minimal()+
  theme(axis.title = element_blank(),
        axis.text = element_blank())+
  geom_point(data = genes_aggr_df, aes(x = X, y = Y, fill = var1_mean), size=3, shape = 22, colour = "grey30")+ # you have to define coordinates columns as well!
  scale_fill_gradientn(colours = c("navyblue", "grey", "red"))+ # define your own palette
  labs(title = "Gene map", 
       caption = "A. Aasa",
       fill = "value")

This was an example how to put spatial points on base map. Next, we look example of line maps.

3.4 Line maps

For that we use data about flights of Estonian national airline Nordica. Dataset covers all the flights operated by Nordica from December 2, 2018 to December 3, 2018. And our aim is to create typical destinations map:

Read the data and check file structure:

flights <- read_delim("http://aasa.ut.ee/Rspatial/data/nordicaFlights.csv", 
    delim = ";", escape_double = FALSE, locale = locale(encoding = "ISO-8859-1"), 
    trim_ws = TRUE)

## Rows: 94 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (6): date, from, to, fligjt, airplane, status
## time (2): departure, arrival
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(flights)

## Rows: 94
## Columns: 8
## $ date      <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:20:00, 09:25:00,…
## $ arrival   <time> 08:25:00, 08:20:00, 07:20:00, 12:30:00, 09:55:00, 12:35:00,…
## $ from      <chr> "Tallinn (TLL)", "Tallinn (TLL)", "Tallinn (TLL)", "München …
## $ to        <chr> "München (MUC)", "Viin (VIE)", "Stockholm (ARN)", "Tallinn (…
## $ fligjt    <chr> "LO8165", "LO8193", "LO8121", "LO8166", "LO790", "LO8194", "…
## $ airplane  <chr> "CR7  i", "CR9  i", "CR9  i", "CR7  i", "CR9  i", "CR9  i", …
## $ status    <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…

flights %>% 
  head() %>% 
  kable()

date	departure	arrival	from	to	fligjt	airplane	status
02.12.2018	07:00:00	08:25:00	Tallinn (TLL)	München (MUC)	LO8165	CR7 i	Graafikujärgne.
02.12.2018	07:00:00	08:20:00	Tallinn (TLL)	Viin (VIE)	LO8193	CR9 i	Graafikujärgne.
02.12.2018	07:20:00	07:20:00	Tallinn (TLL)	Stockholm (ARN)	LO8121	CR9 i	Graafikujärgne.
02.12.2018	09:10:00	12:30:00	München (MUC)	Tallinn (TLL)	LO8166	CR7 i	Graafikujärgne.
02.12.2018	09:20:00	09:55:00	Tallinn (TLL)	Varssavi (WAW)	LO790	CR9 i	Graafikujärgne.
02.12.2018	09:25:00	12:35:00	Viin (VIE)	Tallinn (TLL)	LO8194	CR9 i	Graafikujärgne.

All the data is nicely imported. All columns are stored as factors. But this is currently not important. Question is how can we add to the table the geographical coordinates of origin and destination?
As you probably know every airport has unique code (IATA code). Currently those codes are not in separate column. With help of tidyr and stringr package we can separate column with airport name and code to two columns:

# separate airport name and airport code:
flights <- flights %>% 
  separate(from, c("from_name" ,"from_code"), ' \\(')

flights <- flights %>% 
  separate(to, c("to_name" ,"to_code"), '\\(')

#remove ')' from string:
flights <- flights %>% 
  mutate(from_code = str_replace(from_code, "\\)", ""))

flights <- flights %>% 
  mutate(to_code = str_replace(to_code, "\\)", ""))

flights %>% 
  head() %>% 
  kable()

date	departure	arrival	from_name	from_code	to_name	to_code	fligjt	airplane	status
02.12.2018	07:00:00	08:25:00	Tallinn	TLL	München	MUC	LO8165	CR7 i	Graafikujärgne.
02.12.2018	07:00:00	08:20:00	Tallinn	TLL	Viin	VIE	LO8193	CR9 i	Graafikujärgne.
02.12.2018	07:20:00	07:20:00	Tallinn	TLL	Stockholm	ARN	LO8121	CR9 i	Graafikujärgne.
02.12.2018	09:10:00	12:30:00	München	MUC	Tallinn	TLL	LO8166	CR7 i	Graafikujärgne.
02.12.2018	09:20:00	09:55:00	Tallinn	TLL	Varssavi	WAW	LO790	CR9 i	Graafikujärgne.
02.12.2018	09:25:00	12:35:00	Viin	VIE	Tallinn	TLL	LO8194	CR9 i	Graafikujärgne.

Airport codes are now nicely in separate columns. Next step is to find additional dataset where we have next to the airport codes also the geographical coordinates of airports.

airports <- read.csv("https://datahub.io/core/airport-codes/r/airport-codes.csv")

airports2 <- airports %>% 
  filter(iata_code != "") 

airports2 %>% 
  head() %>% 
  kable()

ident	type	name	elevation_ft	continent	iso_country	iso_region	municipality	gps_code	iata_code	local_code	coordinates
03N	small_airport	Utirik Airport	4	OC	MH	MH-UTI	Utirik Island	K03N	UTK	03N	169.852005, 11.222
07FA	small_airport	Ocean Reef Club Airport	8	NA	US	US-FL	Key Largo	07FA	OCA	07FA	-80.274803161621, 25.325399398804
0AK	small_airport	Pilot Station Airport	305	NA	US	US-AK	Pilot Station		PQS	0AK	-162.899994, 61.934601
0CO2	small_airport	Crested Butte Airpark	8980	NA	US	US-CO	Crested Butte	0CO2	CSE	0CO2	-106.928341, 38.851918
0TE7	small_airport	LBJ Ranch Airport	1515	NA	US	US-TX	Johnson City	0TE7	JCY	0TE7	-98.62249755859999, 30.251800537100003
13MA	small_airport	Metropolitan Airport	418	NA	US	US-MA	Palmer	13MA	PMX	13MA	-72.31140136719999, 42.223300933800004

# Is airport of Tallinn (TLL) available: 
airports2 %>% 
  filter(iata_code == "TLL") %>% 
  kable()

ident	type	name	elevation_ft	continent	iso_country	iso_region	municipality	gps_code	iata_code	local_code	coordinates
EETN	large_airport	Lennart Meri Tallinn Airport	131	EU	EE	EE-37	Tallinn	EETN	TLL		24.832799911499997, 59.41329956049999

As the airport coordinates are stored in one column we have to use separate() function once again:

airp_crd <- airports2 %>% 
  separate(coordinates, c("lng" ,"lat"), ", ")

# convert both columns to numeric and select relevant columns:
airp_crd <- airp_crd %>% 
  mutate(lng = as.numeric(lng), lat = as.numeric(lat)) %>% 
  dplyr::select(iata_code, lng, lat, municipality)

glimpse(airp_crd)

## Rows: 9,225
## Columns: 4
## $ iata_code    <chr> "UTK", "OCA", "PQS", "CSE", "JCY", "PMX", "WLR", "NUP", "…
## $ lng          <dbl> 169.85200, -80.27480, -162.89999, -106.92834, -98.62250, …
## $ lat          <dbl> 11.22200, 25.32540, 61.93460, 38.85192, 30.25180, 42.2233…
## $ municipality <chr> "Utirik Island", "Key Largo", "Pilot Station", "Crested B…

Next we have to join tables (see Practical session 1)

# join flights and airport table by origin:
flights2 <- left_join(flights, airp_crd, by = c("from_code" = "iata_code"))

#rename coordinates columns:
flights2 <- flights2 %>% 
  rename(from_x = lng, from_y = lat, from_mun = municipality)

#join flights and airport table by destination:
flights2 <- left_join(flights2, airp_crd, by = c("to_code" = "iata_code"))

#rename coordinates columns:
flights2 <- flights2 %>% 
  rename(to_x = lng, to_y = lat, to_mun = municipality)

glimpse(flights2)

## Rows: 104
## Columns: 16
## $ date      <chr> "02.12.2018", "02.12.2018", "02.12.2018", "02.12.2018", "02.…
## $ departure <time> 07:00:00, 07:00:00, 07:00:00, 07:20:00, 09:10:00, 09:10:00,…
## $ arrival   <time> 08:25:00, 08:25:00, 08:20:00, 07:20:00, 12:30:00, 12:30:00,…
## $ from_name <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "München", "Münc…
## $ from_code <chr> "TLL", "TLL", "TLL", "TLL", "MUC", "MUC", "TLL", "VIE", "TLL…
## $ to_name   <chr> "München ", "München ", "Viin ", "Stockholm ", "Tallinn ", "…
## $ to_code   <chr> "MUC", "MUC", "VIE", "ARN", "TLL", "TLL", "WAW", "TLL", "AMS…
## $ fligjt    <chr> "LO8165", "LO8165", "LO8193", "LO8121", "LO8166", "LO8166", …
## $ airplane  <chr> "CR7  i", "CR7  i", "CR9  i", "CR9  i", "CR7  i", "CR7  i", …
## $ status    <chr> "Graafikujärgne.", "Graafikujärgne.", "Graafikujärgne.", "Gr…
## $ from_x    <dbl> 24.83280, 24.83280, 24.83280, 24.83280, 11.78610, 11.69030, …
## $ from_y    <dbl> 59.4133, 59.4133, 59.4133, 59.4133, 48.3538, 48.1378, 59.413…
## $ from_mun  <chr> "Tallinn", "Tallinn", "Tallinn", "Tallinn", "Munich", "Munic…
## $ to_x      <dbl> 11.78610, 11.69030, 16.56970, 17.91860, 24.83280, 24.83280, …
## $ to_y      <dbl> 48.3538, 48.1378, 48.1103, 59.6519, 59.4133, 59.4133, 52.165…
## $ to_mun    <chr> "Munich", "Munich", "Vienna", "Stockholm", "Tallinn", "Talli…

Plot the flight on base map:

ggplot()+
  geom_sf(data = countries50, fill = "gray80", col = "gray40", size = 0.5) + 
  geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")

We focus on Europe. For that we can use st_crop() function or we can filter out only European countries:

europe <- countries50 %>% 
  filter(REGION_UN == "Europe")

# plot:
ggplot()+
  geom_sf(data = europe)+
  geom_segment(data = flights2, aes(x=from_x, xend= to_x, y=from_y, yend = to_y), col = "red")

Base map needs some additional croping

europe2 <- st_crop(europe, c(xmin = -10, 
                             xmax = 41,
                             ymin = 45,
                             ymax= 70))


# plot:
ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_segment(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red")

Use curves instead of straight lines:

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

4 Save visualization (png, pdf)

# png:

png("flights.png", res=300, units = "cm", height = 15, width = 15)

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

dev.off() # It is crucial to close the writing session!



# pdf:
pdf("flights.pdf", height = 15, width = 15)

ggplot()+
  theme_minimal()+
  geom_sf(data = europe2, fill = "grey96", colour = "grey60", size = 0.25)+
  geom_curve(data = flights2, aes(x = from_x, xend = to_x, y = from_y, yend = to_y), col = "red", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc")))

dev.off() # It is crucial to close the writing session!

The last version looks already like a thematic map. But still there are some shortcomings:

map scale and north arrow is missing;
map title is missing.

We can try to fix it. Add the north arrow and map scale:

Plot it on top stamen map! In case of ggmap you have to use geom_segment() instead of geom_curve()!

5 Physical map & spatial queries

5.1 Physical map of Estonia

To create the map we need geolayers:

elevation
rivers
lakes
roads
populated places

You can find this kind of datasets from many different places. For example you can download data directly from raster library:
Download country contour (level = 0) and convert it to sf:

gadm_0_estonia <- raster::getData('GADM', country = 'EST', level = 0)
gadm_0_estonia <- st_as_sf(gadm_0_estonia)

If direct download doesn’t work you can download it from here: http://aasa.ut.ee/Rspatial/data/gadm36_EST_0_sf.rds
Use download.file() & read_rds().

Actually it is better to obtain all the data layers from the same source. Let’s get more familiar with Natural Earth. You can download data from there manually. But we can do it directly from R as well. For that you need to install package rnaturalearth.

# install from CRAN:
install.packages("rnaturalearth")

Plot country contour:

tm_shape(gadm_0_estonia)+
  tm_borders()

In next step we try to download layer of countries:

# download:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip", destfile="ne_10m_admin_0_countries_lakes.zip")

# unzip:
unzip("ne_10m_admin_0_countries_lakes.zip")

# import to R:
ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")

# plot to see the visual:
tm_shape(ne_countries_sf)+
  tm_borders()

# Check the table structure (it's faster, when you drop the geometry column):
ne_countries_sf %>% 
  st_drop_geometry() %>% 
  glimpse()

# We need only contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

# plot the result:
tm_shape(ne_countries_sf)+
  tm_borders("red")

There is a chance that direct download isn’t working (that’s the main problem with 3rd party data sources… Usually it appears during the live demo’s).

Next download rivers, lakes, roads & populated places (it may not work either):

# Rivers :
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_lake_centerlines.zip",
              destfile = "ne_10m_rivers_lake_centerlines.zip")

unzip("ne_10m_rivers_lake_centerlines.zip")

# Rivers (additional data for Europe): 
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_europe.zip", 
              destfile="ne_10m_rivers_europe.zip")

unzip("ne_10m_rivers_europe.zip")

# Lakes:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip",
              destfile = "ne_10m_lakes.zip")

unzip("ne_10m_lakes.zip")

# Lakes (Europe supplement):
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes_europe.zip",
              destfile = "ne_10m_lakes_europe.zip")

unzip("ne_10m_lakes_europe.zip")

# populated places:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places.zip", 
              destfile = "ne_10m_populated_places.zip")

unzip("ne_10m_populated_places.zip")


# roads:
download.file("https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_roads.zip",
              destfile = "ne_10m_roads.zip")

unzip("ne_10m_roads.zip")

If previous download wasn’t successful you should download all the layers from course web page and after you can import layers to R:

download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_admin_0_countries_lakes.zip", destfile = "ne_10m_admin_0_countries_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes.zip", destfile = "ne_10m_lakes.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_lakes_europe.zip", destfile = "ne_10m_lakes_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_populated_places.zip", destfile = "ne_10m_populated_places.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_europe.zip", destfile = "ne_10m_rivers_europe.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_rivers_lake_centerlines.zip", destfile = "ne_10m_rivers_lake_centerlines.zip")
download.file("http://aasa.ut.ee/Rspatial/data/data_session_03/ne_10m_roads.zip",  destfile = "ne_10m_roads.zip")

# unzip all the layers!
unzip("filename.zip")

Import downloaded data:

ne_countries_sf <- st_read("ne_10m_admin_0_countries_lakes.shp")
ne_rivers_sf <- st_read("ne_10m_rivers_lake_centerlines.shp")
ne_rivers_europe_sf <- st_read("ne_10m_rivers_europe.shp")
ne_lakes_europe_sf <- st_read("ne_10m_lakes_europe.shp")
ne_lakes_sf <- st_read("ne_10m_lakes.shp")
ne_populated_places_sf <- st_read("ne_10m_populated_places.shp")
ne_roads_sf <- st_read("ne_10m_roads.shp")

Test, how it looks:

tmap_mode("plot")

## tmap mode set to plotting

tm_shape(ne_rivers_sf)+
  tm_lines("dodgerblue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")

## Warning: The shape ne_rivers_sf contains empty units.

As you see, all the data is global. But we only need data relevant to Estonia. For that we can use st_intersection().
Check, how the function works:

?st_intersection

# if sf-package is not active:
??st_intersection

Now clip all the layers with Estonian contour:

# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf)  # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)

# Keep only the contour of Estonia:
ne_countries_sf <- ne_countries_sf %>% 
  filter(NAME == "Estonia")

ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)

ne_rivers_sf <- st_intersection(ne_rivers_sf, ne_countries_sf)
ne_rivers_europe_sf <- st_intersection(ne_rivers_europe_sf, ne_countries_sf)
ne_lakes_sf <- st_intersection(ne_lakes_sf, ne_countries_sf)
ne_lakes_europe_sf <- st_intersection(ne_lakes_europe_sf, ne_countries_sf)  # No additional lakes for Estonia! We leave it out!
ne_roads_sf <- st_intersection(ne_roads_sf, ne_countries_sf)
ne_populated_places_sf <- st_intersection(ne_populated_places_sf, ne_countries_sf)

Success? Check the result:

tm_shape(ne_countries_sf)+
  tm_borders("grey")+
tm_shape(ne_roads_sf)+
  tm_lines("red")

Sometimes the geometry is not valid or invalid. In that case the function st_make_valid()may help:

# for example:
ne_lakes_sf <- st_make_valid(ne_lakes_sf)
ne_lakes_europe_sf <- st_make_valid(ne_lakes_europe_sf)

You can read more about tidying the feature geometry from here: link.

Plot all the results:

tm_shape(ne_countries_sf)+
  tm_borders()+
tm_shape(ne_rivers_sf)+
  tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")+
tm_shape(ne_lakes_sf)+
  tm_polygons("blue")+
tm_shape(ne_roads_sf)+
  tm_lines("red")+
tm_shape(ne_populated_places_sf)+
  tm_dots("red")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")

Number of places is not very big, but adds some additional spatial context to the map.

As it was already mentioned there are always different options to achieve the goal (and quite often some of them don’t work when needed…). For example you can download data from Natural Earth with help of rnaturalearth::ne_download():

ne_roads  <- ne_download(scale = "large", type = "roads", category = "cultural", returnclass = "sf")
class(ne_roads)

# you should remove unneeded objects from workspace:
rm()

It arrives as sf.

5.1.1 Now get the elevation data!

For elevation we can use already familiar getData function from library called raster. Actually there are several datasets available (administrative contours, elevation, climatic data etc. Read more closely from here: link

From there we can use elevation data aggregated from SRTM 90 m (Shuttle Radar Topography Mission) elevation data link.
Elevation raster for Estonia:

elev <- ?raster::getData('alt', country = "EST", mask = T)

But sometimes the download is not working… In that case you can download the elevation layer from link which appears in error message. For Estonia it’s https://biogeo.ucdavis.edu/data/diva/msk_alt/EST_msk_alt.zip

Finally you have the layer and you can plot it:

# check the type of downloaded object:
class(elev)

## [1] "RasterLayer"
## attr(,"package")
## [1] "raster"

# plot:
plot(elev)

Nice! But resolution is not very high. We can access also SRTM 90-meter resolution data! SRTM is downloaded by tiles. We have to define Estonian coordinates (24E 58N is quite safe bet!). Be patient raster is quite big and download may take some time:

srtm <- raster::getData('SRTM', lon = 24, lat = 58)

## Warning in raster::getData("SRTM", lon = 24, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead

Check the result:

plot(srtm)

It’s only western part of Estonia. Download east as well:

srtm2 <- raster::getData('SRTM', lon = 27, lat = 58)

## Warning in raster::getData("SRTM", lon = 27, lat = 58): getData will be removed in a future version of raster
## . Please use the geodata package instead

Downloaded rasters are stored as separate objects. We can bind them together with mosaic():

srtmmosaic <- mosaic(srtm, srtm2, fun = mean)
plot(srtmmosaic)

Better! But if you want to keep it in borders of Estonia, we can crop() and mask() raster outside of the borders:

srtmmosaic <- crop(srtmmosaic, extent(ne_countries_sf))

srtmmosaic <- mask(srtmmosaic, ne_countries_sf)

plot(srtmmosaic)

Why we need crop()?
Looks like we have now all the layers we need. And we can put all the layers in logical order together. We can avoid ggplot() and tmap(). Use just plot():

library(maptools) # enables function 'pointLabel' to add text labels

#plot(st_geometry(ne_countries_sf), col = "grey")
plot(srtmmosaic, col = (terrain.colors(20)))
plot(ne_roads_sf, add = T, col = "red", lwd = 0.1)
plot(ne_lakes_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_sf, add = T, col = "dodgerblue1")
plot(ne_rivers_europe_sf, add = T, col = "dodgerblue1")
plot(ne_populated_places_sf, col = "black", add = T)

# Add place names
pointLabel(st_coordinates(ne_populated_places_sf),labels = ne_populated_places_sf$NAME)

Some people prefer tmap (it’s more intuitive and developed specially for maps):

tm_shape(srtmmosaic)+
  tm_raster(palette = c("darkgreen", "yellow", "orange", "red"), style = "cont", title = "m.a.s.l.")+
tm_shape(ne_rivers_sf)+
  tm_lines("blue")+
tm_shape(ne_rivers_europe_sf)+
  tm_lines("blue")+
tm_shape(ne_lakes_sf)+
  tm_polygons("dodgerblue1")+
tm_shape(ne_roads_sf)+
  tm_lines("red")+
tm_shape(ne_populated_places_sf)+
  tm_dots("red")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")+
tm_scale_bar(position = c("left", "bottom"))+
tm_layout("Physical map of Estonia")+
tm_credits("A. Aasa")

## stars object downsampled to 1717 by 583 cells. See tm_shape manual (argument raster.downsample)

Satistfactory?

5.2 Some additional examples of spatial analysis.

There are quite often the cases where we want to combine several spatial layers and derive new information from this combination.
Let us pretend the situation where we want to calculate average elevation above sea level for every county in Estonia.

We need the layer of Estonian counties in format of SpatialPolygonsDataFrame. Download it again:

gadm_1 <- raster::getData('GADM', country = 'EST', level = 1)

## Warning in raster::getData("GADM", country = "EST", level = 1): getData will be removed in a future version of raster
## . Please use the geodata package instead

class(gadm_1)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

# extract mean from raster:
library(maptools)
alt_EST<- raster::getData('alt', country='EST', mask=TRUE)

## Warning in raster::getData("alt", country = "EST", mask = TRUE): getData will be removed in a future version of raster
## . Please use the geodata package instead

plot(alt_EST)

region_Ave_Elev <- raster::extract(alt_EST, gadm_1, fun = mean, na.rm = TRUE, sp = T)

# To continue you have to know in which format is the output:
class(region_Ave_Elev)

## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

#Check the data:
region_Ave_Elev@data %>% 
  head()

##    GID_0  NAME_0   GID_1   NAME_1                       VARNAME_1 NL_NAME_1
## 1    EST Estonia EST.1_1    Harju          Harjumaa|Harju maakond      <NA>
## 9    EST Estonia EST.2_1     Hiiu       Hiiumaa|Hiiu maakond|Dagö      <NA>
## 10   EST Estonia EST.3_1 Ida-Viru    Ida-Virumaa|Ida-Viru maakond      <NA>
## 11   EST Estonia EST.4_1    Järva   Jaerva|Järvamaa|Järva maakond      <NA>
## 12   EST Estonia EST.5_1   Jõgeva Jogeva|Jõgevamaa|Jõgeva maakond      <NA>
## 14   EST Estonia EST.7_1    Lääne  Laeaene|Läänemaa|Lääne maakond      <NA>
##     TYPE_1 ENGTYPE_1 CC_1 HASC_1 EST_msk_alt
## 1  Maakond    County <NA>  EE.HA    49.80205
## 9  Maakond    County <NA>  EE.HI    17.16073
## 10 Maakond    County <NA>  EE.IV    53.54464
## 11 Maakond    County <NA>  EE.JR    79.18902
## 12 Maakond    County <NA>  EE.JN    69.83882
## 14 Maakond    County <NA>  EE.LN    18.03521

Plot the results

# Convert SpatialpolygonDataFrame to sf:
region_Ave_Elev_sf <- st_as_sf(region_Ave_Elev)

# round the mean value (for map labels!):
region_Ave_Elev_sf <- region_Ave_Elev_sf %>% 
  mutate(EST_msk_alt2 = round(EST_msk_alt, 0))

# create tmap as separate object:

est_elev_counties <- tm_shape(region_Ave_Elev_sf)+
  tm_polygons("EST_msk_alt", style = "pretty", palette = "viridis", title = "m.a.s.l.")+
  tm_text("EST_msk_alt2")

# check the result:
print(est_elev_counties)

We can save the previous map:

# first we need to know the aspect ratio of map:
asp <- tmaptools::get_asp_ratio(est_elev_counties)

tmap_save(est_elev_counties, "est_elev_counties.png", height = 5, with = 5 * asp, units = "in", dpi = 300)

# If you don't remember the working directory:
getwd()
# the exported png-map should be there!

5.3 In next example we analyse where is the warmest place in Estonia?

# download climate means (can be slow!):
climate <- raster::getData('worldclim', var ='bio', res = 0.5, lon = 26, lat = 58)

## Warning in raster::getData("worldclim", var = "bio", res = 0.5, lon = 26, : getData will be removed in a future version of raster
## . Please use the geodata package instead

plot(climate)

# select Annual Mean Temperature only:
climate_2 <- climate$bio1_16
plot(climate_2) # Wrong units! Celsius degree * 10

#  mask it with Estonian contour:
climate_2 <- crop(climate_2, extent(ne_countries_sf))
climate_masked <- mask(climate_2, ne_countries_sf)
plot(climate_masked)

# Annual modelled mean air temperature map:
climate_masked <- climate_masked / 10 # to get real values
plot(st_geometry(ne_countries_sf))
plot(climate_masked, add= T, col = topo.colors(20))
plot(st_geometry(ne_countries_sf), add = T)

# we have some populated places:
tm_shape(climate_masked)+
  tm_raster("bio1_16", palette = c("darkgreen", "grey90", "orange"), style = "cont")+
tm_shape(ne_populated_places_sf)+
  tm_text("NAME")

#sf to SpatialpointDataFrame:
ne_populated_places_sp <- as(ne_populated_places_sf, "Spatial")


# warmest place:
place_temp <- raster::extract(climate_masked, ne_populated_places_sp, fun = mean, na.rm=TRUE, sp = T)

library(knitr)
place_temp@data %>% 
  dplyr::select(NAME, bio1_16) %>% 
  arrange(-bio1_16) %>% 
  kable()

NAME	bio1_16
Haapsalu	5.6
Pärnu	5.6
Tallinn	5.4
Viljandi	5.1
Tartu	5.1
Narva	4.8
Kohtla-Järve	4.6

Warmest place is Haapsalu, Pärnu (5.6, 5.6 Celsius degrees)!

Good additional reading about raster::getData: link

Author: Anto Aasa
Supervisors: Anto Aasa & Lika Zhvania eriti LTOM.02.041
Last update: 2022-11-06 12:49:44

logo

Simple thematic maps

Anto Aasa

November 11, 2022