Libraries for current session

library(XML)
library(tidyverse)
library(sf)
library(gplots)
library(rmapshaper)
library(gstat)
library(stars)
library(ggthemes)
library(elevatr)
library(knitr)
library(terra)
library(osmdata)

In this session we look how to interpolate the data spatially. Very often data is collected in discrete locations but the phenomena itself is continuous (air temperature, air pressure etc). This means that visualizing data only for observation sites may not be enough to get full picture about the spatial patterns.

Spatial interpolation of weather conditions in Estonia

Download latest weather observation data from Estonina Weather Service.
In previous sessions you have seen how to download data in various various formats (csv, Excel, shp, sdmx). One very common format is XML. From here you can read about the reasons: Why should I use XML?
XML format is not human-readable format. It is compromise between human-readability and machine-readability. And skills to use this format may be the first step from “ordinary people” to data scientist.

In R the import of XML is simple (usually :) ). Basically you don’t have to know anything about XML structure. Weather data can be downloaded and imported to R with just 4 lines of code:

weatherDownTime <- Sys.time()

weatherDownTime <- format(weatherDownTime, "%Y-%m-%d %H:%M")

download.file("http://www.ilmateenistus.ee/ilma_andmed/xml/observations.php", "weather.xml")
xmlfile <- xmlParse("weather.xml")

# convert XML to data frame:
weather <- xmlToDataFrame(xmlfile)

# check the result:
glimpse(weather)

## Rows: 151
## Columns: 19
## $ name              <chr> "Kuressaare linn", "Tallinn-Harku", "Pakri", "Kunda"…
## $ wmocode           <chr> "", "26038", "26029", "26045", "26046", "26058", "26…
## $ longitude         <chr> "22.48944444411111", "24.602891666624284", "24.04008…
## $ latitude          <chr> "58.26416666666667", "59.398122222355134", "59.38950…
## $ phenomenon        <chr> "", "Few clouds", "", "Clear", "Cloudy with clear sp…
## $ visibility        <chr> "", "35.0", "35.0", "48.0", "20.0", "20.0", "20.0", …
## $ precipitations    <chr> "", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"…
## $ airpressure       <chr> "", "1010.3", "1010.6", "1011.1", "1011.4", "1012", …
## $ relativehumidity  <chr> "57", "32", "75", "51", "48", "54", "49", "85", "54"…
## $ airtemperature    <chr> "18.4", "21", "13.7", "19.5", "20.4", "18.9", "20", …
## $ winddirection     <chr> "", "238", "320", "96", "190", "147", "122", "", "14…
## $ windspeed         <chr> "", "3.2", "2.1", "6.7", "3.7", "5.5", "3.9", "", "1…
## $ windspeedmax      <chr> "", "5.1", "4", "8.1", "5.5", "8.2", "6.5", "", "4.5…
## $ waterlevel        <chr> "", "", "", "", "", "", "", "", "64", "", "", "", ""…
## $ waterlevel_eh2000 <chr> "", "", "", "10", "", "", "", "", "", "", "", "", ""…
## $ watertemperature  <chr> "", "", "", "10.4", "", "", "", "", "", "", "", "", …
## $ uvindex           <chr> "", "1.4", "", "", "", "", "", "", "", "", "", "", "…
## $ sunshineduration  <chr> "", "284", "", "", "660", "", "", "623", "650", "458…
## $ globalradiation   <chr> "", "244", "", "", "", "265", "", "432", "", "210", …

Tidy up the table:

# convert columns from character to numeric:
weather <- weather %>% 
  mutate(longitude = as.numeric(as.character(longitude)), 
                              latitude = as.numeric(as.character(latitude)),
                              airtemperature = as.numeric(as.character(airtemperature)))

glimpse(weather)

## Rows: 151
## Columns: 19
## $ name              <chr> "Kuressaare linn", "Tallinn-Harku", "Pakri", "Kunda"…
## $ wmocode           <chr> "", "26038", "26029", "26045", "26046", "26058", "26…
## $ longitude         <dbl> 22.48944, 24.60289, 24.04008, 26.54140, 27.39827, 28…
## $ latitude          <dbl> 58.26417, 59.39812, 59.38950, 59.52141, 59.32902, 59…
## $ phenomenon        <chr> "", "Few clouds", "", "Clear", "Cloudy with clear sp…
## $ visibility        <chr> "", "35.0", "35.0", "48.0", "20.0", "20.0", "20.0", …
## $ precipitations    <chr> "", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"…
## $ airpressure       <chr> "", "1010.3", "1010.6", "1011.1", "1011.4", "1012", …
## $ relativehumidity  <chr> "57", "32", "75", "51", "48", "54", "49", "85", "54"…
## $ airtemperature    <dbl> 18.4, 21.0, 13.7, 19.5, 20.4, 18.9, 20.0, 16.5, 19.8…
## $ winddirection     <chr> "", "238", "320", "96", "190", "147", "122", "", "14…
## $ windspeed         <chr> "", "3.2", "2.1", "6.7", "3.7", "5.5", "3.9", "", "1…
## $ windspeedmax      <chr> "", "5.1", "4", "8.1", "5.5", "8.2", "6.5", "", "4.5…
## $ waterlevel        <chr> "", "", "", "", "", "", "", "", "64", "", "", "", ""…
## $ waterlevel_eh2000 <chr> "", "", "", "10", "", "", "", "", "", "", "", "", ""…
## $ watertemperature  <chr> "", "", "", "10.4", "", "", "", "", "", "", "", "", …
## $ uvindex           <chr> "", "1.4", "", "", "", "", "", "", "", "", "", "", "…
## $ sunshineduration  <chr> "", "284", "", "", "660", "", "", "623", "650", "458…
## $ globalradiation   <chr> "", "244", "", "", "", "265", "", "432", "", "210", …

Plot it:

ggplot()+
  geom_point(data = weather, aes(x = longitude, y=latitude, colour = airtemperature))+
  scale_color_gradientn(colours = topo.colors(20))+
  #labs(title = "Air temperature in Estonia", subtitle = weatherDownTime)+
  coord_fixed()

Stations with no data disturb the pattern. Filter them out:

airTemp_Df <- weather %>% 
  #dplyr::
  select(name, longitude, latitude, airtemperature) %>% 
  filter(!is.na(airtemperature))

airTemp_Df <- weather %>% 
  dplyr::select(longitude, latitude, airtemperature) %>% 
  filter(!is.na(airtemperature))

Plot:

ggplot()+
  geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
  scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime, fill = "Temperature (°C)")

For visualisation we need context. For spatial visualisation we need spatial context. Download the contour of Estonia:

# list of available layers:
EHAK_layers <- st_layers("https://teenus.maaamet.ee/ows/ajakohane-haldusjaotus?service=wfs&version=2.0.0&request=GetCapabilities")

# we selecct counties: "ms_MaakondTekst"
est_cont <- st_read("https://teenus.maaamet.ee/ows/ajakohane-haldusjaotus?service=wfs&version=2.0.0&request=GetCapabilities", EHAK_layers$name[10])

Save it to your hard drive, to avoid downloading it every time:

st_write(est_cont, "est_cont.gpkg", delete_dsn = T, delete_layer = T)

To make the display of Estonian contour fast we can simplify it. One option is to apply st_simplify(), but this function may end up with gaps and/or overlapping polygons:

est_cont_smpl <- st_simplify(est_cont, preserveTopology = F, dTolerance = 200)

To keep the geometries correct we can apply ms_simplify() from library rmapshaper:

est_cont_smpl <- est_cont %>% 
  ms_simplify(keep = 0.1, keep_shapes = F) 

object.size(est_cont)
object.size(est_cont_smpl)

Plot:

ggplot()+
  geom_sf(data = est_cont_smpl, fill = "grey", col = "grey30", size = 0.25)+
  geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")

What happened?!
Layers do not overlap. The reason is that temperature data is in geographical coordinates and estonian counties in LEST-97.
Transform counties to EPSG:4326:

est_cont_4326_smpl <- est_cont_smpl %>% 
  st_transform(4326)

ggplot()+
  geom_sf(data = est_cont_4326_smpl, fill = "grey", col = "grey30", size = 0.25)+
  geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")

Nearest neighbour interpolation (Voronoi diagram)

Creating the voronoi diagram is spatial operation and assumes that point data is converted to sf object:

#create simple feature: 
airTemp_sf <- st_as_sf(airTemp_Df, coords= c("longitude", "latitude"), crs = 4326)

#Transform from lat-lon (WGS84) to Estonian official CRS (LEST-97 [epsg:3301])
airTemp_sf_3301 <- st_transform(airTemp_sf, 3301)

class(airTemp_sf_3301)

## [1] "sf"         "data.frame"

glimpse(airTemp_sf_3301)

## Rows: 97
## Columns: 2
## $ airtemperature <dbl> 18.4, 21.0, 13.7, 19.5, 20.4, 18.9, 20.0, 16.5, 19.8, 1…
## $ geometry       <POINT [m]> POINT (411346.6 6459155), POINT (534250.5 6584619…

Create the Voronoi diagramm:

# Combines geometries into one: 
airTemp_sf_3301_gm <- st_union(airTemp_sf_3301)

# Create the Voronoi diagram: 
airTemp_sf_voronoi <- st_voronoi(airTemp_sf_3301_gm)

class(airTemp_sf_voronoi)

## [1] "sfc_GEOMETRYCOLLECTION" "sfc"

The output is sfc (Simple Feature Column), geometry column only. We need sf (complete spatial dataframe: sfc column plus data attributes):

airTemp_sf_voronoi <- st_sf(airTemp_sf_voronoi)

class(airTemp_sf_voronoi)

## [1] "sf"         "data.frame"

glimpse(airTemp_sf_voronoi)

## Rows: 1
## Columns: 1
## $ airTemp_sf_voronoi <GEOMETRYCOLLECTION [m]> GEOMETRYCOLLECTION (POLYGON...

ggplot()+
  geom_sf(data = airTemp_sf_voronoi, fill="red", colour  = "black", size=0.5)

Now we have a Voronoi diagram of Estonian meteorological observation sites. However, we need to mask it with the Estonian boundary and add the missing attributes (air temperature data), since the Voronoi polygons are currently empty geometrical objects. There are several approaches to solve this problem. The most straightforward method is to perform a spatial join between (st_join()) the Voronoi diagram and the spatial points layer:

#st_intersection masks the voronoi with Estonian contour: 
airTemp_sf_voronoi <- st_intersection(st_cast(airTemp_sf_voronoi), st_union(st_buffer(est_cont_smpl, dist = 500))) 

airTemp_sf_voronoi <- st_sf(airTemp_sf_voronoi)

# plot the geometry
ggplot()+
  theme_void()+
  geom_sf(data = airTemp_sf_voronoi)

Join the attributes (air temperasture) spatial points with voronoi polygons:

# Spatial join:
airTemp_sf_voronoi2 <- st_join(airTemp_sf_voronoi, airTemp_sf_3301)
glimpse(airTemp_sf_voronoi2)

## Rows: 97
## Columns: 2
## $ airtemperature     <dbl> 13.4, 14.0, 13.1, 15.9, 12.9, 18.8, 18.4, 18.2, 18.…
## $ airTemp_sf_voronoi <GEOMETRY [m]> MULTIPOLYGON (((412461 6541..., MULTIPOLYG…

# plot the result:
ggplot()+
  geom_sf(data = airTemp_sf_voronoi2, aes(fill = airtemperature), size=0.25, colour = "grey70")+
  scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
  geom_sf_label(data = airTemp_sf_3301, aes(label = airtemperature), col="black",  size=3, alpha = 0.5)+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime, fill = "Temperature (°C)")

If the observation network is dense, then the result looks also nice and logical. But very often the Voronoi interpolation is not smooth enough and we should consider to apply other methods.

IDW

According to the Wikipedia is inverse distance weighting (IDW) a type of deterministic method for multivariate interpolation with a known scattered set of points. The assigned values to unknown points are calculated with a weighted average of the values available at the known points.
The name given to this type of methods was motivated by the weighted average applied, since it resorts to the inverse of the distance to each known point (“amount of proximity”) when assigning weights.

Software capabilities are developing very rapidly. Next, we will look at two examples, where the first is an approach from years ago for creating interpolation grid, and in the second we see how a large amount of manual work has moved automatically inside functions:

# Get the extent of interpolation area:
grd_extent <- st_bbox(est_cont_smpl)

# create grid:
x.range <- as.numeric(c(grd_extent[1], grd_extent[3])) # min/max longitude of the interpolation area
y.range <- as.numeric(c(grd_extent[2], grd_extent[4])) # min/max latitude of the interpolation area

grd <- expand.grid(x = seq(from = x.range[1], to = x.range[2], by = 1000), y = seq(from = y.range[1], to = y.range[2], by = 1000)) # expand points to grid

Convert grid to spatial object:

sp::coordinates(grd) <- ~x + y 
sp::gridded(grd) <- TRUE

class(grd)

## [1] "SpatialPixels"
## attr(,"package")
## [1] "sp"

sp::plot(grd)

IDW NEW.

# Create a grid for interpolation
grid <- st_make_grid(est_cont_smpl, 
                     cellsize = 5000,
                     #n = c(100, 100)
                     ) %>%
  st_sf() %>%
  st_filter(est_cont_smpl)

ggplot()+
  theme_void()+
  geom_sf(data = grid)

Interpolate the values:

tmp_time <- Sys.time()

# Perform IDW interpolation
idw_result <- gstat(formula = airtemperature ~ 1, 
                   data = airTemp_sf_3301,
                   nmax = 10, # number of nearest observations
                   set = list(idp = 2)) %>%
  predict(grid)

## [inverse distance weighted interpolation]

Sys.time() - tmp_time

## Time difference of 18.34252 secs

# Another option to create colour palette:
palett <- rich.colors(40, palette = "temperature", rgb = F)

# Plot
ggplot() +
  theme_minimal() +
  geom_sf(data = idw_result, aes(fill = var1.pred), col = NA) +
  geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45") +
  scale_fill_gradientn(colours = palett) +
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")

Or you are bored of traditional raster maps and want to use other regular grid? Hexagonal binning for example? For the hexagonal binning it’s better to use Estonian official CRS (LEST97, epsg:3301). This means we have to transform temperature data. In case of SpatialPointDataFrame we can’t use sf command st_transform().

# Create hexagonal grid directly with sf
grid_hex <- st_make_grid(
  est_cont_smpl,  # assuming you have sf version of your boundary
  cellsize = 10000,
  square = FALSE     # creates hexagonal grid
  ) %>%
  st_sf() %>%
  mutate(id = row_number())

Plot the hexagons:

ggplot()+
  theme_void()+
  geom_sf(data = grid_hex)+
  geom_sf(data = est_cont_smpl, fill = NA, color = "red")

Calculate IDW and map it:

# calculate IDW for hexagons:
tmp_time <- Sys.time()
P_idw_hex <- gstat::idw(airtemperature~1, airTemp_sf_3301, newdata = grid_hex, idp = 2)

## [inverse distance weighted interpolation]

Sys.time() - tmp_time

## Time difference of 2.244089 secs

# Crop with Estonian contur
P_idw_hex <- P_idw_hex %>% 
 st_sf() %>%
  st_filter(est_cont_smpl)

# plot it:
ggplot()+
  theme_map()+
  theme(panel.grid = element_line(color = "grey60", size = 0.15),
        legend.position = "right")+
  geom_sf(data = P_idw_hex, aes(fill = var1.pred), col = "grey60", size = 0.1)+
  geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45") +
  #scale_fill_viridis_c(option = "cividis")+
  scale_fill_gradientn(colours = c("blue", "lightblue", "lightyellow", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Celsius")

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Polynomial fit

In current example we use 2nd order polynomial. How the spatial interpolation work is explained in here: link

# Create a regular grid using stars
grd <- st_bbox(est_cont_smpl) %>%
  st_as_stars(dx = 500, dy = 500) %>%  
  st_crop(est_cont_smpl) %>%
  st_as_sf(as_points = TRUE)


# Extract coordinates for the grid and ensure numeric type
grd_coords <- st_coordinates(grd)
grd$X <- as.numeric(grd_coords[,1])
grd$Y <- as.numeric(grd_coords[,2])

# Prepare temperature data with coordinates (ensure same data type)
airTemp_coords <- st_coordinates(airTemp_sf_3301)

airTemp_df <- airTemp_sf_3301 %>%
  st_drop_geometry() %>%
  mutate(X = as.numeric(airTemp_coords[,1]),
         Y = as.numeric(airTemp_coords[,2]))

# Check data types are consistent
str(airTemp_df[c("X", "Y")])

## 'data.frame':    97 obs. of  2 variables:
##  $ X: num  411347 534251 502278 643825 693367 ...
##  $ Y: num  6459155 6584619 6583505 6600925 6581667 ...

str(grd[c("X", "Y")])

## Classes 'sf' and 'data.frame':   173853 obs. of  3 variables:
##  $ X       : num  601284 601784 595784 596284 595284 ...
##  $ Y       : num  6617251 6616751 6615751 6615751 6615251 ...
##  $ geometry:sfc_POINT of length 173853; first list element:  'XY' num  601284 6617251
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA
##   ..- attr(*, "names")= chr [1:2] "X" "Y"

# 2nd order polynomial formula
f.2 <- as.formula(airtemperature ~ X + Y + I(X*X) + I(Y*Y) + I(X*Y))

# Run the regression model
lm.2 <- lm(f.2, data = airTemp_df)

# Predict on the grid 
grd$var1.pred <- predict(lm.2, newdata = grd)

grd <- grd %>% 
  filter(!is.na(var1.pred))



# Plot
ggplot() +
  theme_minimal() +
  geom_sf(data = grd, aes(col = var1.pred) 
          #, col = NA
          ) +
  geom_sf(data = est_cont_smpl, fill = NA, size = 0.01, colour = "grey45") +
  #scale_color_gradientn(colours = palett) +
  scale_color_gradientn(colours = c("navy", "darkgreen", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       color = "Temperature (°C)")

Sometimes we want to use contours instead of raster.
Convert points to raster using stars:

# Create a raster template
bbox <- st_bbox(grd)
raster_template <- st_as_stars(bbox, dx = 1000, dy = 1000)  # adjust resolution as needed

# Interpolate points to raster
raster_data <- st_rasterize(grd["var1.pred"], template = raster_template)

plot(raster_data)

Create countours:

# Create contours
contours <- st_contour(raster_data, 
                       contour_lines = F
                       )

# Remove infinite:
contours <- contours %>% 
  filter(!is.infinite(Min)) 


# Plot
ggplot() +
  geom_sf(data = contours, aes(fill = Min)) +
  scale_fill_gradientn(colours = c("purple4", "grey80", "forestgreen"),
                       #labels = contours$var1.pred
                       )+
  theme_void() +
  labs(title = "Temperature Contours", fill = "°C")

3rd order polynomial fit

# 3rd polynomial model:
#f.3 <- as.formula(airtemperature ~ X + Y + I(X^X)+I(Y^Y) + I(X^Y))
f.3 <- as.formula(airtemperature ~ X + Y + I(X^2) + I(Y^2) + I(X*Y) + 
                  I(X^3) + I(Y^3) + I(X^2*Y) + I(X*Y^2))

# Run the regression model
lm.3 <- lm(f.3, data = airTemp_df)
summary(lm.3)

## 
## Call:
## lm(formula = f.3, data = airTemp_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.3900 -0.5387  0.0932  0.7071  3.7719 
## 
## Coefficients:
##                                Estimate                  Std. Error t value
## (Intercept) -246598.5371448798978235573  201734.8145534356008283794  -1.222
## X                 0.1121323274321209262       0.0234430729969315604   4.783
## Y                 0.1023083088598909784       0.0931237553565511661   1.099
## I(X^2)            0.0000000013555808053       0.0000000022079894727   0.614
## I(Y^2)           -0.0000000139501596026       0.0000000143403108052  -0.973
## I(X * Y)         -0.0000000348218185693       0.0000000073993551758  -4.706
## I(X^3)            0.0000000000000003974       0.0000000000000001696   2.343
## I(Y^3)            0.0000000000000006222       0.0000000000000007367   0.845
## I(X^2 * Y)       -0.0000000000000003436       0.0000000000000003559  -0.965
## I(X * Y^2)        0.0000000000000027174       0.0000000000000005850   4.645
##               Pr(>|t|)    
## (Intercept)     0.2249    
## X           0.00000698 ***
## Y               0.2750    
## I(X^2)          0.5409    
## I(Y^2)          0.3334    
## I(X * Y)    0.00000945 ***
## I(X^3)          0.0214 *  
## I(Y^3)          0.4007    
## I(X^2 * Y)      0.3371    
## I(X * Y^2)  0.00001196 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.28 on 87 degrees of freedom
## Multiple R-squared:  0.7129, Adjusted R-squared:  0.6832 
## F-statistic: 24.01 on 9 and 87 DF,  p-value: < 0.00000000000000022

# Predict on the grid (now should work)
grd$var1.pred_3rd <- predict(lm.3, newdata = (grd))

grd <- grd %>% 
  filter(!is.na(var1.pred_3rd))



# Plot
ggplot() +
  theme_minimal() +
  geom_sf(data = grd, aes(col = var1.pred_3rd) 
          #, col = NA
          ) +
  geom_sf(data = est_cont_smpl, fill = NA, size = 0.01, colour = "grey45") +
  #scale_color_gradientn(colours = palett) +
  scale_color_gradientn(colours = c("navy", "darkgreen", "yellow", "orange", "red"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       color = "Temperature (°C)")

Still bad…

Kriging

Accordign to Wikipedia: “In statistics, originally in geostatistics, kriging or Gaussian process regression is a method of interpolation for which the interpolated values are modeled by a Gaussian process governed by prior covariances. Under suitable assumptions on the priors, kriging gives the best linear unbiased prediction of the intermediate values. Interpolating methods based on other criteria such as smoothness (e.g., smoothing spline) need not yield the most likely intermediate values. The method is widely used in the domain of spatial analysis and computer experiments.”

Kriging is a little more involved than IDW as it requires the construction of a semivariogram model to describe the spatial autocorrelation pattern for your particular variable.

More about kriging (and other methods): link

# define the model:
#P_gs <- gstat::gstat(formula = airtemperature ~ 1, locations = airTemp_Df_xy)
P_gs <- gstat::gstat(formula = airtemperature ~ 1, locations = airTemp_sf_3301)

P_v <- variogram(P_gs, width = 1)
plot(P_v)

P_semivariog <- variogram(airtemperature ~ 1, locations = airTemp_sf_3301, data = airTemp_sf_3301)
plot(P_semivariog)

Semivariogram:

P_semivariog %>% 
  knitr::kable()

np	dist	gamma	id
24	4994.109	0.8358333	var1
47	14554.163	2.0474468	var1
83	23955.323	3.0221687	var1
102	33716.388	2.0398039	var1
159	43032.924	2.0177673	var1
164	52174.055	2.5081098	var1
179	61515.520	3.0781285	var1
180	71231.771	3.3664444	var1
201	80434.165	3.4283333	var1
228	90107.836	3.2912719	var1
244	99837.892	3.9166803	var1
226	109519.302	5.5529425	var1
210	118584.580	4.4717143	var1
241	128263.227	4.5182158	var1
245	137986.365	5.5690000	var1

From the empirical semivariogram plot and the information contained in the semivariog gstat object, we can estimate the sill, range and nugget to use in our model semivariogram.

“https://mgimond.github.io/Spatial/spatial-interpolation.html”

# From your semivariogram plot, visually identify:
# 1. NUGGET = Y-intercept (where the curve starts on Y-axis)
# 2. SILL = Plateau value (where the curve levels off)  
# 3. RANGE = X-value where curve reaches ~95% of sill

# Based on your semivariogram plot, estimate:

# REPLACE WITH CORRECT VALUES!!!
nugget <- 1.2     # Y-intercept value
sill <- 3.7       # Plateau/maximum value
range <- 65000    # Distance where curve levels off

# For vgm(), you need PARTIAL SILL (not total sill)

psill <- sill - nugget  # = 4.2 - 2.2 = 2.0

Syntax to help estimate parameters automatically:

# Nugget: minimum gamma value (or close to it)
nugget_auto <- min(P_semivariog$gamma)

# Sill: plateau value (mean of last few points)
n_points <- nrow(P_semivariog)
sill_auto <- mean(P_semivariog$gamma[(n_points-2):n_points])

# Range: distance where gamma reaches ~95% of sill
target_gamma <- nugget_auto + 0.95 * (sill_auto - nugget_auto)
range_auto <- P_semivariog$dist[which.max(P_semivariog$gamma >= target_gamma)]

psill_auto <- sill_auto - nugget_auto

Visually vs automatically detected values can be quite different…

Parameter	Visual	Automatic
nugget	1.2	0.8
psill	2.5	4.0
range	65000.0	109519.3

!!! Interpolation may fail if parameters are set incorrectly.

Calculate the Variogram Model:

P_model.variog <- vgm(psill = psill, model = "Gau", nugget = nugget, range = range)

# fit model:
P_fit.variog <- fit.variogram(P_semivariog, P_model.variog)

## Warning in fit.variogram(P_semivariog, P_model.variog): No convergence after
## 200 iterations: try different initial values?

plot(P_semivariog, P_fit.variog) # check from Help what are the other available models and choose the best fit!```

Kriging:

P_krig <- gstat::krige(formula = airtemperature ~ 1, locations = airTemp_sf_3301, newdata = grid, model = P_model.variog)

## [using ordinary kriging]

# just in case
#airTemp_sf_3301 <- airTemp_sf_3301 %>% 
#  filter(!is.na(airtemperature))

#st_crs(airTemp_sf_3301) <- st_crs(grid)

Map:

ggplot()+
  theme_minimal()+
  geom_sf(data = P_krig, aes(fill = var1.pred), color = NA, linewidth = .01)+
  #geom_sf(data = est_cont, fill=NA, size=0.3, colour="grey45")+
  scale_fill_gradientn(colours = c("navy", "lightblue",  "yellow", "orange"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")

And finally put raster, isotherms and labels together:

ggplot()+
  theme_minimal()+
   geom_sf(data = P_krig, aes(fill = var1.pred), color = NA, linewidth = .01)+
  geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45")+
  scale_fill_gradientn(colours = c("navy", "lightblue",  "yellow", "orange"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")+
  geom_sf_label(data = airTemp_sf_3301, aes(label = airtemperature), size = 2, alpha = 0.75)

Are you satisfied with this? Sometimes it is very hard to find suitable interpolation method. In that case you can try to apply some simple statistical model. Let’s test how it could be done with multiple linear regression. Look again the air temperature observations.

Interpolation with linear regression

Before we run the model, we should check, is there correlation between air temperature and coordinates:

ggplot(data = airTemp_Df)+
  geom_point(aes(x= longitude, y = airtemperature))+
  stat_smooth(aes(x= longitude, y = airtemperature), method = "lm")

ggplot(data = airTemp_Df)+
  geom_point(aes(x= latitude, y = airtemperature))+
  stat_smooth(aes(x= latitude, y = airtemperature), method = "lm")

Correlation between longitude and air temperature is 0.19 and correlation between latitude and air temperature is 0.24. Let’s try…

Create the multiple linear regression model, where air temperature is function and coordinates are arguments:

# first add LEST-97 coordinates to the attributes table:
airTemp_sf_3301 <- bind_cols(airTemp_sf_3301, st_coordinates(airTemp_sf_3301))

airtemperature_lm <- lm(airtemperature ~ X + Y, 
                        data = st_drop_geometry(airTemp_sf_3301))

Check the model. Is it significant (P-value < 0.05)? Are all model members significant?:

# model summary:
summary(airtemperature_lm)

## 
## Call:
## lm(formula = airtemperature ~ X + Y, data = st_drop_geometry(airTemp_sf_3301))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.0185 -1.0338  0.1106  1.4325  3.9217 
## 
## Coefficients:
##                  Estimate    Std. Error t value Pr(>|t|)  
## (Intercept) -37.713957776  24.531936932  -1.537   0.1276  
## X             0.000003997   0.000002339   1.709   0.0908 .
## Y             0.000008419   0.000003782   2.226   0.0284 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.2 on 94 degrees of freedom
## Multiple R-squared:  0.08416,    Adjusted R-squared:  0.06468 
## F-statistic: 4.319 on 2 and 94 DF,  p-value: 0.01605

# model coefficients:
coef(airtemperature_lm)

##      (Intercept)                X                Y 
## -37.713957775582   0.000003997402   0.000008418713

Model is significant. We can now calculate values for all the hexagons. We can use previousely created hexagons:

ggplot()+
  theme_void()+
  geom_sf(data = grid_hex)+
  geom_sf(data = est_cont_smpl, fill = NA, color = "red")

Model:

# make copy of hexagons layer:
grid_hex_2 <- grid_hex

# calculte hexagon centroids and add them to the table:
grid_hex_2 <- bind_cols(grid_hex_2, st_coordinates(st_centroid(grid_hex_2)))

## Warning: st_centroid assumes attributes are constant over geometries

glimpse(grid_hex_2)

## Rows: 1,193
## Columns: 4
## $ id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
## $ X        <dbl> 364033.8, 364033.8, 364033.8, 364033.8, 364033.8, 364033.8, 3…
## $ Y        <dbl> 6385807, 6403127, 6420448, 6437768, 6455089, 6472409, 6489730…
## $ geometry <POLYGON [m]> POLYGON ((364033.8 6380033,..., POLYGON ((364033.8 63…

# calculate predictions:
grid_hex_2 <- grid_hex_2 %>% 
  mutate(airtemp_mod = X * coef(airtemperature_lm)[2] +
           Y * coef(airtemperature_lm)[3] +
           coef(airtemperature_lm)[1]) %>% 
  st_filter(est_cont_smpl)

Plot the results:

ggplot(data = grid_hex_2)+
  geom_sf(aes(fill = airtemp_mod), color = NA)+
  scale_fill_gradientn(colours = c("blue", "lightblue", "grey90","yellow",  "orange"))+
  geom_sf(data = est_cont_smpl, fill = NA, color = "grey")

Not very nice?
It is a well known fact to geographers that for every 100 meters of elevation gained above sea level, air temperature decreases by 0.6 degrees. Maybe we should add the elevation component to our model as well? Let’s try!

Download the elevation data:

est_elev <- get_elev_raster(locations = est_cont_smpl, z = 10, clip = "bbox")
# Convert to terra raster for better performance
est_elev_terra <- rast(est_elev)

plot(est_elev_terra, main = "Estonia Elevation")

Extract elevation for meteorological stations using terra():

airTemp_sf_elev <- terra::extract(est_elev_terra, vect(airTemp_sf_3301))

## Warning: [extract] transforming vector data to the CRS of the raster

airTemp_sf_3301$elev <- airTemp_sf_elev[,2]  # Second column contains elevation values

ggplot() +
  geom_sf(data = est_cont_smpl, fill = "lightgray", color = "white") +
  geom_sf(data = airTemp_sf_3301, aes(color = elev, size = elev), alpha = 0.8) +
  scale_color_gradientn(colours = c("navy", "blue", "lightblue", "yellow", "orange", "brown"), na.value = "magenta", name = "Elevation (m)") +
  scale_size_continuous(range = c(1, 4), guide = "none") +
  theme_void() +
  labs(title = "Meteorological Stations - Elevation")

Extract average elevation for hexagonal grid using terra & Convert sf to terra vector for extraction:

#Convert:
grid_hex_vect <- vect(grid_hex)

# Extract elevation values
hex_elev_extracted <- terra::extract(est_elev_terra, grid_hex, fun = mean, na.rm = TRUE)

## Warning: [extract] transforming vector data to the CRS of the raster

# Add elevation to the sf object
grid_hex_elev <-  grid_hex
  
grid_hex_elev$elevation <- hex_elev_extracted[,2]

# Plot elevation map:
ggplot() +
  geom_sf(data = grid_hex_elev, aes(fill = elevation), color = "white", size = 0.1) +
  scale_fill_viridis_c(option = "terrain", na.value = "gray90", name = "Elevation (m)") +
  theme_void() +
  labs(title = "Estonia Elevation Map")

## Warning in viridisLite::viridis(n, alpha, begin, end, direction, option):
## Option 'terrain' does not exist. Defaulting to 'viridis'.

As we are dealing with air temperature we should crop the map:

grid_hex_elev <- grid_hex_elev %>% 
  st_filter(est_cont_smpl)

ggplot()+
  geom_sf(data = grid_hex_elev, aes(fill = elevation))+
  scale_fill_gradientn(colours = c("blue", "lightyellow", "brown"))

Create the model:

# tidy the table (add centroid coordinates to attributes table):
grid_hex_elev <- bind_cols(grid_hex_elev, st_coordinates(st_centroid(grid_hex_elev)))

# the model:
airtemperature_lm2 <- lm(airtemperature ~ X + Y + elev, 
                         data = st_drop_geometry(airTemp_sf_3301))

summary(airtemperature_lm2)

## 
## Call:
## lm(formula = airtemperature ~ X + Y + elev, data = st_drop_geometry(airTemp_sf_3301))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.0679 -1.0545  0.1125  1.2054  4.0237 
## 
## Coefficients:
##                  Estimate    Std. Error t value Pr(>|t|)  
## (Intercept) -41.349506797  25.227854379  -1.639    0.105  
## X             0.000003049   0.000002759   1.105    0.272  
## Y             0.000009034   0.000003909   2.311    0.023 *
## elev          0.004983239   0.007624210   0.654    0.515  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.206 on 93 degrees of freedom
## Multiple R-squared:  0.08835,    Adjusted R-squared:  0.05894 
## F-statistic: 3.004 on 3 and 93 DF,  p-value: 0.03435

coef(airtemperature_lm2)

##      (Intercept)                X                Y             elev 
## -41.349506796517   0.000003049380   0.000009034112   0.004983239009

# calculate prediction for every polygon:
grid_hex_elev <- grid_hex_elev %>% 
  mutate(airtemp_mod_elev = X * coef(airtemperature_lm2)[2] +
           Y * coef(airtemperature_lm2)[3] +
           elevation  * coef(airtemperature_lm2)[4]+
           coef(airtemperature_lm2)[1])

# final plot:
ggplot()+
  geom_sf(data = grid_hex_elev, aes( fill = airtemp_mod_elev), color = NA, linewidth = .01)+
  geom_sf(data = est_cont, fill = NA, size = 0.3, colour="grey45")+
  scale_fill_gradientn(colours = c("dodgerblue",  "yellow", "orange"))+
  labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
       fill = "Temperature (°C)")

You have to be careful! Is it still the map of air temperature of is it map of elevation?

Heat maps

Wikipedia: “A heat map (or heatmap) is a graphical representation of data where the individual values contained in a matrix are represented as colors. A density function visualization is a heat map for representing the density of dots in a map.”

Transparent points

Sometimes we even don’t need complex time consuming calculations and just using the transparency works well. In our next example we use gps-points from Monterey city, California. For that we have to download monterey map from Stamen maps.

# Import data:
good_bad_places <- read_csv("https://aasa.ut.ee/Rspatial/data/good_bad_places.csv")

## Rows: 273 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): type
## dbl (2): latitude, longitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(good_bad_places)

## Rows: 273
## Columns: 3
## $ latitude  <dbl> 58.37997, 58.38296, 58.38061, 58.35542, 58.35646, 58.35639, …
## $ longitude <dbl> 26.73287, 26.72441, 26.72676, 26.72159, 26.73336, 26.73360, …
## $ type      <chr> "good", "good", "good", "bad", "bad", "good", "good", "bad",…

good_bad_places <- good_bad_places %>% 
  filter(!is.na(longitude))

# convert it to sf object:
votes_sf <- st_as_sf(good_bad_places, 
                     coords = c("longitude", "latitude"), 
                     crs = 4326, 
                     remove = F)

Transparent Dots Heatmap (Alpha Blending)

# Basic transparent dots - density shows through overlapping
ggplot() +
  theme_map()+
  geom_point(data = good_bad_places, 
             aes(x = longitude, y = latitude, color = type), 
             alpha = 0.3, size = 2) +
  scale_color_manual(values = c("good" = "green", "bad" = "red")) +
  theme_minimal() +
  labs(title = "Heatmap with Transparent Dots",
       subtitle = "Density visible through alpha blending",
       x = "Longitude", y = "Latitude", color = "Vote Type") +
  coord_fixed()

# Enhanced version with size and alpha variation
ggplot() +
  theme_map()+
  geom_point(data = good_bad_places, 
             aes(x = longitude, y = latitude, color = type), 
             alpha = 0.4, size = 3) +
  stat_density_2d(data = good_bad_places, 
                  aes(x = longitude, y = latitude), 
                  alpha = 0.6, color = "grey", size = 0.5) +
  scale_color_manual(values = c("good" = "#2E8B57", "bad" = "#DC143C")) +
  labs(title = "Transparent Dots + Density Contours",
       color = "Vote Type")

2D Density Heatmap (Kernel Density)

Simple 2D density plot:

ggplot(data = good_bad_places, aes(x = longitude, y = latitude)) +
  theme_map()+
  stat_density_2d_filled(alpha = 0.8, bins = 15) +
  geom_point(size = 0.5, alpha = 0.6) +
  scale_fill_viridis_d(option = "plasma", name = "Density") +
  labs(title = "2D Kernel Density Heatmap",
       x = "Longitude", y = "Latitude")

Hexagonal binning - counts points in each hexagon:

ggplot(data = good_bad_places, aes(x = longitude, y = latitude)) +
  theme_map()+
  theme(panel.background = element_rect(fill = "black"),
        plot.background = element_rect(fill = "black"),
        plot.title = element_text(color = "yellow"),
        legend.background = element_rect(fill = "black"),
        legend.title = element_text(color = "yellow"),
        legend.text = element_text(color = "yellow")
        
  )+
  stat_bin_hex(bins = 20, alpha = 0.8) +
  scale_fill_gradientn(colours = c("grey", "black", "yellow"), name = "Count") +
  
  labs(title = "Hexagonal Binning Heatmap",
       subtitle = "Count of votes in each hexagonal cell",
       x = "Longitude", y = "Latitude")

Hexagonal binning by vote type:

good_bad_places %>%
  count(type, lon_bin = round(longitude, 3), lat_bin = round(latitude, 3)) %>%
  ggplot(aes(x = lon_bin, y = lat_bin)) +
  theme_map()+
  stat_bin_hex(aes(weight = n), bins = 15, alpha = 0.8) +
  facet_wrap(~type, labeller = as_labeller(c("good" = "Good Places", "bad" = "Bad Places"))) +
  scale_fill_gradientn(colours = c("grey", "black", "yellow"), name = "Count") +
  
  labs(title = "Hexagonal Binning by Vote Type",
       x = "Longitude", y = "Latitude")

Without the spatial context the maps are pointless…

We can download some additional layers and add them to the map. Let’s try to add road information to the maps.

# Define Tartu bounding box
#tartu_bbox <- c(left = 26.69, bottom = 58.36, right = 26.75, top = 58.40)
st_bbox(votes_sf)

##     xmin     ymin     xmax     ymax 
## 26.64568 58.34292 26.79125 58.39960

# Download roads

roads <- opq(bbox = st_bbox(votes_sf)) %>%
  add_osm_feature(key = 'highway', 
                 value = c('motorway', 'trunk', 'primary', 'secondary', 
                          'tertiary', 'residential', 'unclassified')) %>%
  osmdata_sf()

ggplot()+
  geom_sf(data = roads$osm_lines, aes(color = highway), linewidth = .2)+
  scale_colour_manual(values = c("grey80", "grey70", "grey50", "grey40", "grey20", "grey"))+
  guides(colour = "none")

ggplot() +
  theme_void() +
  
  # Filled contours for transparent fill
  stat_density_2d_filled(data = good_bad_places, 
                         aes(x = longitude, y = latitude), 
                         alpha = .7, 
                         bins = 8) +
  # Contour lines on top
  stat_density_2d(data = good_bad_places, 
                  aes(x = longitude, y = latitude), 
                  color = "white", 
                  size = 0.3, 
                  alpha = 1) +
  scale_fill_viridis_d(name = "Density") +
  # roads
  geom_sf(data = roads$osm_lines, linewidth = .2, color = "black") +guides(colour = "none") +
  labs(title = "Transparent Fill + Contour Lines")

Author: Anto Aasa
Supervisors: Anto Aasa
LTOM.02.041
Last update: 2025-05-28 18:05:59.830887

logo

Spatial interpolation

Anto Aasa

May 27, 2025