Libraries for current session
library(XML)
library(tidyverse)
library(sf)
library(gplots)
library(rmapshaper)
library(gstat)
library(stars)
library(ggthemes)
library(elevatr)
library(knitr)
library(terra)
library(osmdata)
In this session we look how to interpolate the data spatially. Very often data is collected in discrete locations but the phenomena itself is continuous (air temperature, air pressure etc). This means that visualizing data only for observation sites may not be enough to get full picture about the spatial patterns.
Download latest weather observation data from Estonina
Weather Service.
In previous sessions you have seen how to download data in various
various formats (csv, Excel, shp,
sdmx). One very common format is XML. From
here you can read about the reasons: Why should I use
XML?
XML format is not human-readable
format. It is compromise between human-readability and
machine-readability. And skills to use this format may be the first step
from “ordinary people” to data scientist.
In R the import of XML is simple (usually :) ). Basically you don’t have to know anything about XML structure. Weather data can be downloaded and imported to R with just 4 lines of code:
weatherDownTime <- Sys.time()
weatherDownTime <- format(weatherDownTime, "%Y-%m-%d %H:%M")
download.file("http://www.ilmateenistus.ee/ilma_andmed/xml/observations.php", "weather.xml")
xmlfile <- xmlParse("weather.xml")
# convert XML to data frame:
weather <- xmlToDataFrame(xmlfile)
# check the result:
glimpse(weather)
## Rows: 151
## Columns: 19
## $ name <chr> "Kuressaare linn", "Tallinn-Harku", "Pakri", "Kunda"…
## $ wmocode <chr> "", "26038", "26029", "26045", "26046", "26058", "26…
## $ longitude <chr> "22.48944444411111", "24.602891666624284", "24.04008…
## $ latitude <chr> "58.26416666666667", "59.398122222355134", "59.38950…
## $ phenomenon <chr> "", "Few clouds", "", "Clear", "Cloudy with clear sp…
## $ visibility <chr> "", "35.0", "35.0", "48.0", "20.0", "20.0", "20.0", …
## $ precipitations <chr> "", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"…
## $ airpressure <chr> "", "1010.3", "1010.6", "1011.1", "1011.4", "1012", …
## $ relativehumidity <chr> "57", "32", "75", "51", "48", "54", "49", "85", "54"…
## $ airtemperature <chr> "18.4", "21", "13.7", "19.5", "20.4", "18.9", "20", …
## $ winddirection <chr> "", "238", "320", "96", "190", "147", "122", "", "14…
## $ windspeed <chr> "", "3.2", "2.1", "6.7", "3.7", "5.5", "3.9", "", "1…
## $ windspeedmax <chr> "", "5.1", "4", "8.1", "5.5", "8.2", "6.5", "", "4.5…
## $ waterlevel <chr> "", "", "", "", "", "", "", "", "64", "", "", "", ""…
## $ waterlevel_eh2000 <chr> "", "", "", "10", "", "", "", "", "", "", "", "", ""…
## $ watertemperature <chr> "", "", "", "10.4", "", "", "", "", "", "", "", "", …
## $ uvindex <chr> "", "1.4", "", "", "", "", "", "", "", "", "", "", "…
## $ sunshineduration <chr> "", "284", "", "", "660", "", "", "623", "650", "458…
## $ globalradiation <chr> "", "244", "", "", "", "265", "", "432", "", "210", …
Tidy up the table:
# convert columns from character to numeric:
weather <- weather %>%
mutate(longitude = as.numeric(as.character(longitude)),
latitude = as.numeric(as.character(latitude)),
airtemperature = as.numeric(as.character(airtemperature)))
glimpse(weather)
## Rows: 151
## Columns: 19
## $ name <chr> "Kuressaare linn", "Tallinn-Harku", "Pakri", "Kunda"…
## $ wmocode <chr> "", "26038", "26029", "26045", "26046", "26058", "26…
## $ longitude <dbl> 22.48944, 24.60289, 24.04008, 26.54140, 27.39827, 28…
## $ latitude <dbl> 58.26417, 59.39812, 59.38950, 59.52141, 59.32902, 59…
## $ phenomenon <chr> "", "Few clouds", "", "Clear", "Cloudy with clear sp…
## $ visibility <chr> "", "35.0", "35.0", "48.0", "20.0", "20.0", "20.0", …
## $ precipitations <chr> "", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"…
## $ airpressure <chr> "", "1010.3", "1010.6", "1011.1", "1011.4", "1012", …
## $ relativehumidity <chr> "57", "32", "75", "51", "48", "54", "49", "85", "54"…
## $ airtemperature <dbl> 18.4, 21.0, 13.7, 19.5, 20.4, 18.9, 20.0, 16.5, 19.8…
## $ winddirection <chr> "", "238", "320", "96", "190", "147", "122", "", "14…
## $ windspeed <chr> "", "3.2", "2.1", "6.7", "3.7", "5.5", "3.9", "", "1…
## $ windspeedmax <chr> "", "5.1", "4", "8.1", "5.5", "8.2", "6.5", "", "4.5…
## $ waterlevel <chr> "", "", "", "", "", "", "", "", "64", "", "", "", ""…
## $ waterlevel_eh2000 <chr> "", "", "", "10", "", "", "", "", "", "", "", "", ""…
## $ watertemperature <chr> "", "", "", "10.4", "", "", "", "", "", "", "", "", …
## $ uvindex <chr> "", "1.4", "", "", "", "", "", "", "", "", "", "", "…
## $ sunshineduration <chr> "", "284", "", "", "660", "", "", "623", "650", "458…
## $ globalradiation <chr> "", "244", "", "", "", "265", "", "432", "", "210", …
Plot it:
ggplot()+
geom_point(data = weather, aes(x = longitude, y=latitude, colour = airtemperature))+
scale_color_gradientn(colours = topo.colors(20))+
#labs(title = "Air temperature in Estonia", subtitle = weatherDownTime)+
coord_fixed()
Stations with no data disturb the pattern. Filter them out:
airTemp_Df <- weather %>%
#dplyr::
select(name, longitude, latitude, airtemperature) %>%
filter(!is.na(airtemperature))
airTemp_Df <- weather %>%
dplyr::select(longitude, latitude, airtemperature) %>%
filter(!is.na(airtemperature))
Plot:
ggplot()+
geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime, fill = "Temperature (°C)")
For visualisation we need context. For spatial visualisation we need spatial context. Download the contour of Estonia:
# list of available layers:
EHAK_layers <- st_layers("https://teenus.maaamet.ee/ows/ajakohane-haldusjaotus?service=wfs&version=2.0.0&request=GetCapabilities")
# we selecct counties: "ms_MaakondTekst"
est_cont <- st_read("https://teenus.maaamet.ee/ows/ajakohane-haldusjaotus?service=wfs&version=2.0.0&request=GetCapabilities", EHAK_layers$name[10])
Save it to your hard drive, to avoid downloading it every time:
st_write(est_cont, "est_cont.gpkg", delete_dsn = T, delete_layer = T)
To make the display of Estonian contour fast we can simplify it. One
option is to apply st_simplify(), but this function may end
up with gaps and/or overlapping polygons:
est_cont_smpl <- st_simplify(est_cont, preserveTopology = F, dTolerance = 200)
To keep the geometries correct we can apply
ms_simplify() from library rmapshaper:
est_cont_smpl <- est_cont %>%
ms_simplify(keep = 0.1, keep_shapes = F)
object.size(est_cont)
object.size(est_cont_smpl)
Plot:
ggplot()+
geom_sf(data = est_cont_smpl, fill = "grey", col = "grey30", size = 0.25)+
geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")
What happened?!
Layers do not overlap. The reason is that temperature data is in
geographical coordinates and estonian counties in LEST-97.
Transform counties to EPSG:4326:
est_cont_4326_smpl <- est_cont_smpl %>%
st_transform(4326)
ggplot()+
geom_sf(data = est_cont_4326_smpl, fill = "grey", col = "grey30", size = 0.25)+
geom_point(data = airTemp_Df, aes(x = longitude, y=latitude, fill = airtemperature), shape=21, size=5)+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")
Creating the voronoi diagram is spatial operation and assumes that point data is converted to sf object:
#create simple feature:
airTemp_sf <- st_as_sf(airTemp_Df, coords= c("longitude", "latitude"), crs = 4326)
#Transform from lat-lon (WGS84) to Estonian official CRS (LEST-97 [epsg:3301])
airTemp_sf_3301 <- st_transform(airTemp_sf, 3301)
class(airTemp_sf_3301)
## [1] "sf" "data.frame"
glimpse(airTemp_sf_3301)
## Rows: 97
## Columns: 2
## $ airtemperature <dbl> 18.4, 21.0, 13.7, 19.5, 20.4, 18.9, 20.0, 16.5, 19.8, 1…
## $ geometry <POINT [m]> POINT (411346.6 6459155), POINT (534250.5 6584619…
Create the Voronoi diagramm:
# Combines geometries into one:
airTemp_sf_3301_gm <- st_union(airTemp_sf_3301)
# Create the Voronoi diagram:
airTemp_sf_voronoi <- st_voronoi(airTemp_sf_3301_gm)
class(airTemp_sf_voronoi)
## [1] "sfc_GEOMETRYCOLLECTION" "sfc"
The output is sfc (Simple Feature Column), geometry column only. We need sf (complete spatial dataframe: sfc column plus data attributes):
airTemp_sf_voronoi <- st_sf(airTemp_sf_voronoi)
class(airTemp_sf_voronoi)
## [1] "sf" "data.frame"
glimpse(airTemp_sf_voronoi)
## Rows: 1
## Columns: 1
## $ airTemp_sf_voronoi <GEOMETRYCOLLECTION [m]> GEOMETRYCOLLECTION (POLYGON...
ggplot()+
geom_sf(data = airTemp_sf_voronoi, fill="red", colour = "black", size=0.5)
Now we have a Voronoi diagram of Estonian meteorological observation
sites. However, we need to mask it with the Estonian boundary and add
the missing attributes (air temperature data), since the Voronoi
polygons are currently empty geometrical objects. There are several
approaches to solve this problem. The most straightforward method is to
perform a spatial join between (st_join())
the Voronoi diagram and the spatial points layer:
#st_intersection masks the voronoi with Estonian contour:
airTemp_sf_voronoi <- st_intersection(st_cast(airTemp_sf_voronoi), st_union(st_buffer(est_cont_smpl, dist = 500)))
airTemp_sf_voronoi <- st_sf(airTemp_sf_voronoi)
# plot the geometry
ggplot()+
theme_void()+
geom_sf(data = airTemp_sf_voronoi)
Join the attributes (air temperasture) spatial points with voronoi polygons:
# Spatial join:
airTemp_sf_voronoi2 <- st_join(airTemp_sf_voronoi, airTemp_sf_3301)
glimpse(airTemp_sf_voronoi2)
## Rows: 97
## Columns: 2
## $ airtemperature <dbl> 13.4, 14.0, 13.1, 15.9, 12.9, 18.8, 18.4, 18.2, 18.…
## $ airTemp_sf_voronoi <GEOMETRY [m]> MULTIPOLYGON (((412461 6541..., MULTIPOLYG…
# plot the result:
ggplot()+
geom_sf(data = airTemp_sf_voronoi2, aes(fill = airtemperature), size=0.25, colour = "grey70")+
scale_fill_gradientn(colours = c("navyblue", "blue", "cyan", "green", "yellow", "orange", "red"))+
geom_sf_label(data = airTemp_sf_3301, aes(label = airtemperature), col="black", size=3, alpha = 0.5)+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime, fill = "Temperature (°C)")
If the observation network is dense, then the result looks also nice and logical. But very often the Voronoi interpolation is not smooth enough and we should consider to apply other methods.
According to the Wikipedia is inverse distance weighting (IDW) a type of deterministic method for multivariate interpolation with a known scattered set of points. The assigned values to unknown points are calculated with a weighted average of the values available at the known points.
The name given to this type of methods was motivated by the weighted average applied, since it resorts to the inverse of the distance to each known point (“amount of proximity”) when assigning weights.
Software capabilities are developing very rapidly. Next, we will look at two examples, where the first is an approach from years ago for creating interpolation grid, and in the second we see how a large amount of manual work has moved automatically inside functions:
# Get the extent of interpolation area:
grd_extent <- st_bbox(est_cont_smpl)
# create grid:
x.range <- as.numeric(c(grd_extent[1], grd_extent[3])) # min/max longitude of the interpolation area
y.range <- as.numeric(c(grd_extent[2], grd_extent[4])) # min/max latitude of the interpolation area
grd <- expand.grid(x = seq(from = x.range[1], to = x.range[2], by = 1000), y = seq(from = y.range[1], to = y.range[2], by = 1000)) # expand points to grid
Convert grid to spatial object:
sp::coordinates(grd) <- ~x + y
sp::gridded(grd) <- TRUE
class(grd)
## [1] "SpatialPixels"
## attr(,"package")
## [1] "sp"
sp::plot(grd)
# Create a grid for interpolation
grid <- st_make_grid(est_cont_smpl,
cellsize = 5000,
#n = c(100, 100)
) %>%
st_sf() %>%
st_filter(est_cont_smpl)
ggplot()+
theme_void()+
geom_sf(data = grid)
Interpolate the values:
tmp_time <- Sys.time()
# Perform IDW interpolation
idw_result <- gstat(formula = airtemperature ~ 1,
data = airTemp_sf_3301,
nmax = 10, # number of nearest observations
set = list(idp = 2)) %>%
predict(grid)
## [inverse distance weighted interpolation]
Sys.time() - tmp_time
## Time difference of 18.34252 secs
# Another option to create colour palette:
palett <- rich.colors(40, palette = "temperature", rgb = F)
# Plot
ggplot() +
theme_minimal() +
geom_sf(data = idw_result, aes(fill = var1.pred), col = NA) +
geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45") +
scale_fill_gradientn(colours = palett) +
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")
Or you are bored of traditional raster maps and want to use other
regular grid? Hexagonal binning for example? For the hexagonal binning
it’s better to use Estonian official CRS (LEST97, epsg:3301). This means
we have to transform temperature data. In case of
SpatialPointDataFrame we can’t use sf command
st_transform().
# Create hexagonal grid directly with sf
grid_hex <- st_make_grid(
est_cont_smpl, # assuming you have sf version of your boundary
cellsize = 10000,
square = FALSE # creates hexagonal grid
) %>%
st_sf() %>%
mutate(id = row_number())
Plot the hexagons:
ggplot()+
theme_void()+
geom_sf(data = grid_hex)+
geom_sf(data = est_cont_smpl, fill = NA, color = "red")
Calculate IDW and map it:
# calculate IDW for hexagons:
tmp_time <- Sys.time()
P_idw_hex <- gstat::idw(airtemperature~1, airTemp_sf_3301, newdata = grid_hex, idp = 2)
## [inverse distance weighted interpolation]
Sys.time() - tmp_time
## Time difference of 2.244089 secs
# Crop with Estonian contur
P_idw_hex <- P_idw_hex %>%
st_sf() %>%
st_filter(est_cont_smpl)
# plot it:
ggplot()+
theme_map()+
theme(panel.grid = element_line(color = "grey60", size = 0.15),
legend.position = "right")+
geom_sf(data = P_idw_hex, aes(fill = var1.pred), col = "grey60", size = 0.1)+
geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45") +
#scale_fill_viridis_c(option = "cividis")+
scale_fill_gradientn(colours = c("blue", "lightblue", "lightyellow", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Celsius")
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
In current example we use 2nd order polynomial. How the spatial interpolation work is explained in here: link
# Create a regular grid using stars
grd <- st_bbox(est_cont_smpl) %>%
st_as_stars(dx = 500, dy = 500) %>%
st_crop(est_cont_smpl) %>%
st_as_sf(as_points = TRUE)
# Extract coordinates for the grid and ensure numeric type
grd_coords <- st_coordinates(grd)
grd$X <- as.numeric(grd_coords[,1])
grd$Y <- as.numeric(grd_coords[,2])
# Prepare temperature data with coordinates (ensure same data type)
airTemp_coords <- st_coordinates(airTemp_sf_3301)
airTemp_df <- airTemp_sf_3301 %>%
st_drop_geometry() %>%
mutate(X = as.numeric(airTemp_coords[,1]),
Y = as.numeric(airTemp_coords[,2]))
# Check data types are consistent
str(airTemp_df[c("X", "Y")])
## 'data.frame': 97 obs. of 2 variables:
## $ X: num 411347 534251 502278 643825 693367 ...
## $ Y: num 6459155 6584619 6583505 6600925 6581667 ...
str(grd[c("X", "Y")])
## Classes 'sf' and 'data.frame': 173853 obs. of 3 variables:
## $ X : num 601284 601784 595784 596284 595284 ...
## $ Y : num 6617251 6616751 6615751 6615751 6615251 ...
## $ geometry:sfc_POINT of length 173853; first list element: 'XY' num 601284 6617251
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA
## ..- attr(*, "names")= chr [1:2] "X" "Y"
# 2nd order polynomial formula
f.2 <- as.formula(airtemperature ~ X + Y + I(X*X) + I(Y*Y) + I(X*Y))
# Run the regression model
lm.2 <- lm(f.2, data = airTemp_df)
# Predict on the grid
grd$var1.pred <- predict(lm.2, newdata = grd)
grd <- grd %>%
filter(!is.na(var1.pred))
# Plot
ggplot() +
theme_minimal() +
geom_sf(data = grd, aes(col = var1.pred)
#, col = NA
) +
geom_sf(data = est_cont_smpl, fill = NA, size = 0.01, colour = "grey45") +
#scale_color_gradientn(colours = palett) +
scale_color_gradientn(colours = c("navy", "darkgreen", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
color = "Temperature (°C)")
Sometimes we want to use contours instead of raster.
Convert points to raster using stars:
# Create a raster template
bbox <- st_bbox(grd)
raster_template <- st_as_stars(bbox, dx = 1000, dy = 1000) # adjust resolution as needed
# Interpolate points to raster
raster_data <- st_rasterize(grd["var1.pred"], template = raster_template)
plot(raster_data)
Create countours:
# Create contours
contours <- st_contour(raster_data,
contour_lines = F
)
# Remove infinite:
contours <- contours %>%
filter(!is.infinite(Min))
# Plot
ggplot() +
geom_sf(data = contours, aes(fill = Min)) +
scale_fill_gradientn(colours = c("purple4", "grey80", "forestgreen"),
#labels = contours$var1.pred
)+
theme_void() +
labs(title = "Temperature Contours", fill = "°C")
# 3rd polynomial model:
#f.3 <- as.formula(airtemperature ~ X + Y + I(X^X)+I(Y^Y) + I(X^Y))
f.3 <- as.formula(airtemperature ~ X + Y + I(X^2) + I(Y^2) + I(X*Y) +
I(X^3) + I(Y^3) + I(X^2*Y) + I(X*Y^2))
# Run the regression model
lm.3 <- lm(f.3, data = airTemp_df)
summary(lm.3)
##
## Call:
## lm(formula = f.3, data = airTemp_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3900 -0.5387 0.0932 0.7071 3.7719
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -246598.5371448798978235573 201734.8145534356008283794 -1.222
## X 0.1121323274321209262 0.0234430729969315604 4.783
## Y 0.1023083088598909784 0.0931237553565511661 1.099
## I(X^2) 0.0000000013555808053 0.0000000022079894727 0.614
## I(Y^2) -0.0000000139501596026 0.0000000143403108052 -0.973
## I(X * Y) -0.0000000348218185693 0.0000000073993551758 -4.706
## I(X^3) 0.0000000000000003974 0.0000000000000001696 2.343
## I(Y^3) 0.0000000000000006222 0.0000000000000007367 0.845
## I(X^2 * Y) -0.0000000000000003436 0.0000000000000003559 -0.965
## I(X * Y^2) 0.0000000000000027174 0.0000000000000005850 4.645
## Pr(>|t|)
## (Intercept) 0.2249
## X 0.00000698 ***
## Y 0.2750
## I(X^2) 0.5409
## I(Y^2) 0.3334
## I(X * Y) 0.00000945 ***
## I(X^3) 0.0214 *
## I(Y^3) 0.4007
## I(X^2 * Y) 0.3371
## I(X * Y^2) 0.00001196 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.28 on 87 degrees of freedom
## Multiple R-squared: 0.7129, Adjusted R-squared: 0.6832
## F-statistic: 24.01 on 9 and 87 DF, p-value: < 0.00000000000000022
# Predict on the grid (now should work)
grd$var1.pred_3rd <- predict(lm.3, newdata = (grd))
grd <- grd %>%
filter(!is.na(var1.pred_3rd))
# Plot
ggplot() +
theme_minimal() +
geom_sf(data = grd, aes(col = var1.pred_3rd)
#, col = NA
) +
geom_sf(data = est_cont_smpl, fill = NA, size = 0.01, colour = "grey45") +
#scale_color_gradientn(colours = palett) +
scale_color_gradientn(colours = c("navy", "darkgreen", "yellow", "orange", "red"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
color = "Temperature (°C)")
Still bad…
Accordign to Wikipedia: “In statistics, originally in geostatistics, kriging or Gaussian process regression is a method of interpolation for which the interpolated values are modeled by a Gaussian process governed by prior covariances. Under suitable assumptions on the priors, kriging gives the best linear unbiased prediction of the intermediate values. Interpolating methods based on other criteria such as smoothness (e.g., smoothing spline) need not yield the most likely intermediate values. The method is widely used in the domain of spatial analysis and computer experiments.”
Kriging is a little more involved than IDW as it requires the construction of a semivariogram model to describe the spatial autocorrelation pattern for your particular variable.
More about kriging (and other methods): link
# define the model:
#P_gs <- gstat::gstat(formula = airtemperature ~ 1, locations = airTemp_Df_xy)
P_gs <- gstat::gstat(formula = airtemperature ~ 1, locations = airTemp_sf_3301)
P_v <- variogram(P_gs, width = 1)
plot(P_v)
P_semivariog <- variogram(airtemperature ~ 1, locations = airTemp_sf_3301, data = airTemp_sf_3301)
plot(P_semivariog)
Semivariogram:
P_semivariog %>%
knitr::kable()
| np | dist | gamma | dir.hor | dir.ver | id |
|---|---|---|---|---|---|
| 24 | 4994.109 | 0.8358333 | 0 | 0 | var1 |
| 47 | 14554.163 | 2.0474468 | 0 | 0 | var1 |
| 83 | 23955.323 | 3.0221687 | 0 | 0 | var1 |
| 102 | 33716.388 | 2.0398039 | 0 | 0 | var1 |
| 159 | 43032.924 | 2.0177673 | 0 | 0 | var1 |
| 164 | 52174.055 | 2.5081098 | 0 | 0 | var1 |
| 179 | 61515.520 | 3.0781285 | 0 | 0 | var1 |
| 180 | 71231.771 | 3.3664444 | 0 | 0 | var1 |
| 201 | 80434.165 | 3.4283333 | 0 | 0 | var1 |
| 228 | 90107.836 | 3.2912719 | 0 | 0 | var1 |
| 244 | 99837.892 | 3.9166803 | 0 | 0 | var1 |
| 226 | 109519.302 | 5.5529425 | 0 | 0 | var1 |
| 210 | 118584.580 | 4.4717143 | 0 | 0 | var1 |
| 241 | 128263.227 | 4.5182158 | 0 | 0 | var1 |
| 245 | 137986.365 | 5.5690000 | 0 | 0 | var1 |
From the empirical semivariogram plot and the information contained in the semivariog gstat object, we can estimate the sill, range and nugget to use in our model semivariogram.
“https://mgimond.github.io/Spatial/spatial-interpolation.html”
# From your semivariogram plot, visually identify:
# 1. NUGGET = Y-intercept (where the curve starts on Y-axis)
# 2. SILL = Plateau value (where the curve levels off)
# 3. RANGE = X-value where curve reaches ~95% of sill
# Based on your semivariogram plot, estimate:
# REPLACE WITH CORRECT VALUES!!!
nugget <- 1.2 # Y-intercept value
sill <- 3.7 # Plateau/maximum value
range <- 65000 # Distance where curve levels off
# For vgm(), you need PARTIAL SILL (not total sill)
psill <- sill - nugget # = 4.2 - 2.2 = 2.0
Syntax to help estimate parameters automatically:
# Nugget: minimum gamma value (or close to it)
nugget_auto <- min(P_semivariog$gamma)
# Sill: plateau value (mean of last few points)
n_points <- nrow(P_semivariog)
sill_auto <- mean(P_semivariog$gamma[(n_points-2):n_points])
# Range: distance where gamma reaches ~95% of sill
target_gamma <- nugget_auto + 0.95 * (sill_auto - nugget_auto)
range_auto <- P_semivariog$dist[which.max(P_semivariog$gamma >= target_gamma)]
psill_auto <- sill_auto - nugget_auto
Visually vs automatically detected values can be quite different…
| Parameter | Visual | Automatic |
|---|---|---|
| nugget | 1.2 | 0.8 |
| psill | 2.5 | 4.0 |
| range | 65000.0 | 109519.3 |
!!! Interpolation may fail if parameters are set incorrectly.
Calculate the Variogram Model:
P_model.variog <- vgm(psill = psill, model = "Gau", nugget = nugget, range = range)
# fit model:
P_fit.variog <- fit.variogram(P_semivariog, P_model.variog)
## Warning in fit.variogram(P_semivariog, P_model.variog): No convergence after
## 200 iterations: try different initial values?
plot(P_semivariog, P_fit.variog) # check from Help what are the other available models and choose the best fit!```
Kriging:
P_krig <- gstat::krige(formula = airtemperature ~ 1, locations = airTemp_sf_3301, newdata = grid, model = P_model.variog)
## [using ordinary kriging]
# just in case
#airTemp_sf_3301 <- airTemp_sf_3301 %>%
# filter(!is.na(airtemperature))
#st_crs(airTemp_sf_3301) <- st_crs(grid)
Map:
ggplot()+
theme_minimal()+
geom_sf(data = P_krig, aes(fill = var1.pred), color = NA, linewidth = .01)+
#geom_sf(data = est_cont, fill=NA, size=0.3, colour="grey45")+
scale_fill_gradientn(colours = c("navy", "lightblue", "yellow", "orange"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")
And finally put raster, isotherms and labels together:
ggplot()+
theme_minimal()+
geom_sf(data = P_krig, aes(fill = var1.pred), color = NA, linewidth = .01)+
geom_sf(data = est_cont_smpl, fill = NA, size = 0.3, colour = "grey45")+
scale_fill_gradientn(colours = c("navy", "lightblue", "yellow", "orange"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")+
geom_sf_label(data = airTemp_sf_3301, aes(label = airtemperature), size = 2, alpha = 0.75)
Are you satisfied with this? Sometimes it is very hard to find suitable interpolation method. In that case you can try to apply some simple statistical model. Let’s test how it could be done with multiple linear regression. Look again the air temperature observations.
Before we run the model, we should check, is there correlation between air temperature and coordinates:
ggplot(data = airTemp_Df)+
geom_point(aes(x= longitude, y = airtemperature))+
stat_smooth(aes(x= longitude, y = airtemperature), method = "lm")
ggplot(data = airTemp_Df)+
geom_point(aes(x= latitude, y = airtemperature))+
stat_smooth(aes(x= latitude, y = airtemperature), method = "lm")
Correlation between longitude and air temperature is 0.19 and correlation between latitude and air temperature is 0.24. Let’s try…
Create the multiple linear regression model, where air temperature is function and coordinates are arguments:
# first add LEST-97 coordinates to the attributes table:
airTemp_sf_3301 <- bind_cols(airTemp_sf_3301, st_coordinates(airTemp_sf_3301))
airtemperature_lm <- lm(airtemperature ~ X + Y,
data = st_drop_geometry(airTemp_sf_3301))
Check the model. Is it significant (P-value < 0.05)? Are all model members significant?:
# model summary:
summary(airtemperature_lm)
##
## Call:
## lm(formula = airtemperature ~ X + Y, data = st_drop_geometry(airTemp_sf_3301))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0185 -1.0338 0.1106 1.4325 3.9217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -37.713957776 24.531936932 -1.537 0.1276
## X 0.000003997 0.000002339 1.709 0.0908 .
## Y 0.000008419 0.000003782 2.226 0.0284 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.2 on 94 degrees of freedom
## Multiple R-squared: 0.08416, Adjusted R-squared: 0.06468
## F-statistic: 4.319 on 2 and 94 DF, p-value: 0.01605
# model coefficients:
coef(airtemperature_lm)
## (Intercept) X Y
## -37.713957775582 0.000003997402 0.000008418713
Model is significant. We can now calculate values for all the hexagons. We can use previousely created hexagons:
ggplot()+
theme_void()+
geom_sf(data = grid_hex)+
geom_sf(data = est_cont_smpl, fill = NA, color = "red")
Model:
# make copy of hexagons layer:
grid_hex_2 <- grid_hex
# calculte hexagon centroids and add them to the table:
grid_hex_2 <- bind_cols(grid_hex_2, st_coordinates(st_centroid(grid_hex_2)))
## Warning: st_centroid assumes attributes are constant over geometries
glimpse(grid_hex_2)
## Rows: 1,193
## Columns: 4
## $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
## $ X <dbl> 364033.8, 364033.8, 364033.8, 364033.8, 364033.8, 364033.8, 3…
## $ Y <dbl> 6385807, 6403127, 6420448, 6437768, 6455089, 6472409, 6489730…
## $ geometry <POLYGON [m]> POLYGON ((364033.8 6380033,..., POLYGON ((364033.8 63…
# calculate predictions:
grid_hex_2 <- grid_hex_2 %>%
mutate(airtemp_mod = X * coef(airtemperature_lm)[2] +
Y * coef(airtemperature_lm)[3] +
coef(airtemperature_lm)[1]) %>%
st_filter(est_cont_smpl)
Plot the results:
ggplot(data = grid_hex_2)+
geom_sf(aes(fill = airtemp_mod), color = NA)+
scale_fill_gradientn(colours = c("blue", "lightblue", "grey90","yellow", "orange"))+
geom_sf(data = est_cont_smpl, fill = NA, color = "grey")
Not very nice?
It is a well known fact to geographers that for every 100 meters of
elevation gained above sea level, air temperature decreases by 0.6
degrees. Maybe we should add the elevation component to our model as
well? Let’s try!
Download the elevation data:
est_elev <- get_elev_raster(locations = est_cont_smpl, z = 10, clip = "bbox")
# Convert to terra raster for better performance
est_elev_terra <- rast(est_elev)
plot(est_elev_terra, main = "Estonia Elevation")
Extract elevation for meteorological stations using
terra():
airTemp_sf_elev <- terra::extract(est_elev_terra, vect(airTemp_sf_3301))
## Warning: [extract] transforming vector data to the CRS of the raster
airTemp_sf_3301$elev <- airTemp_sf_elev[,2] # Second column contains elevation values
ggplot() +
geom_sf(data = est_cont_smpl, fill = "lightgray", color = "white") +
geom_sf(data = airTemp_sf_3301, aes(color = elev, size = elev), alpha = 0.8) +
scale_color_gradientn(colours = c("navy", "blue", "lightblue", "yellow", "orange", "brown"), na.value = "magenta", name = "Elevation (m)") +
scale_size_continuous(range = c(1, 4), guide = "none") +
theme_void() +
labs(title = "Meteorological Stations - Elevation")
Extract average elevation for hexagonal grid using terra & Convert sf to terra vector for extraction:
#Convert:
grid_hex_vect <- vect(grid_hex)
# Extract elevation values
hex_elev_extracted <- terra::extract(est_elev_terra, grid_hex, fun = mean, na.rm = TRUE)
## Warning: [extract] transforming vector data to the CRS of the raster
# Add elevation to the sf object
grid_hex_elev <- grid_hex
grid_hex_elev$elevation <- hex_elev_extracted[,2]
# Plot elevation map:
ggplot() +
geom_sf(data = grid_hex_elev, aes(fill = elevation), color = "white", size = 0.1) +
scale_fill_viridis_c(option = "terrain", na.value = "gray90", name = "Elevation (m)") +
theme_void() +
labs(title = "Estonia Elevation Map")
## Warning in viridisLite::viridis(n, alpha, begin, end, direction, option):
## Option 'terrain' does not exist. Defaulting to 'viridis'.
As we are dealing with air temperature we should crop the map:
grid_hex_elev <- grid_hex_elev %>%
st_filter(est_cont_smpl)
ggplot()+
geom_sf(data = grid_hex_elev, aes(fill = elevation))+
scale_fill_gradientn(colours = c("blue", "lightyellow", "brown"))
Create the model:
# tidy the table (add centroid coordinates to attributes table):
grid_hex_elev <- bind_cols(grid_hex_elev, st_coordinates(st_centroid(grid_hex_elev)))
# the model:
airtemperature_lm2 <- lm(airtemperature ~ X + Y + elev,
data = st_drop_geometry(airTemp_sf_3301))
summary(airtemperature_lm2)
##
## Call:
## lm(formula = airtemperature ~ X + Y + elev, data = st_drop_geometry(airTemp_sf_3301))
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0679 -1.0545 0.1125 1.2054 4.0237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -41.349506797 25.227854379 -1.639 0.105
## X 0.000003049 0.000002759 1.105 0.272
## Y 0.000009034 0.000003909 2.311 0.023 *
## elev 0.004983239 0.007624210 0.654 0.515
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.206 on 93 degrees of freedom
## Multiple R-squared: 0.08835, Adjusted R-squared: 0.05894
## F-statistic: 3.004 on 3 and 93 DF, p-value: 0.03435
coef(airtemperature_lm2)
## (Intercept) X Y elev
## -41.349506796517 0.000003049380 0.000009034112 0.004983239009
# calculate prediction for every polygon:
grid_hex_elev <- grid_hex_elev %>%
mutate(airtemp_mod_elev = X * coef(airtemperature_lm2)[2] +
Y * coef(airtemperature_lm2)[3] +
elevation * coef(airtemperature_lm2)[4]+
coef(airtemperature_lm2)[1])
# final plot:
ggplot()+
geom_sf(data = grid_hex_elev, aes( fill = airtemp_mod_elev), color = NA, linewidth = .01)+
geom_sf(data = est_cont, fill = NA, size = 0.3, colour="grey45")+
scale_fill_gradientn(colours = c("dodgerblue", "yellow", "orange"))+
labs(title = "Air temperature in Estonia", subtitle = weatherDownTime,
fill = "Temperature (°C)")
You have to be careful! Is it still the map of air temperature of is it map of elevation?
Wikipedia: “A heat map (or heatmap) is a graphical representation of data where the individual values contained in a matrix are represented as colors. A density function visualization is a heat map for representing the density of dots in a map.”
Sometimes we even don’t need complex time consuming calculations and just using the transparency works well. In our next example we use gps-points from Monterey city, California. For that we have to download monterey map from Stamen maps.
# Import data:
good_bad_places <- read_csv("https://aasa.ut.ee/Rspatial/data/good_bad_places.csv")
## Rows: 273 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): type
## dbl (2): latitude, longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(good_bad_places)
## Rows: 273
## Columns: 3
## $ latitude <dbl> 58.37997, 58.38296, 58.38061, 58.35542, 58.35646, 58.35639, …
## $ longitude <dbl> 26.73287, 26.72441, 26.72676, 26.72159, 26.73336, 26.73360, …
## $ type <chr> "good", "good", "good", "bad", "bad", "good", "good", "bad",…
good_bad_places <- good_bad_places %>%
filter(!is.na(longitude))
# convert it to sf object:
votes_sf <- st_as_sf(good_bad_places,
coords = c("longitude", "latitude"),
crs = 4326,
remove = F)
# Basic transparent dots - density shows through overlapping
ggplot() +
theme_map()+
geom_point(data = good_bad_places,
aes(x = longitude, y = latitude, color = type),
alpha = 0.3, size = 2) +
scale_color_manual(values = c("good" = "green", "bad" = "red")) +
theme_minimal() +
labs(title = "Heatmap with Transparent Dots",
subtitle = "Density visible through alpha blending",
x = "Longitude", y = "Latitude", color = "Vote Type") +
coord_fixed()
# Enhanced version with size and alpha variation
ggplot() +
theme_map()+
geom_point(data = good_bad_places,
aes(x = longitude, y = latitude, color = type),
alpha = 0.4, size = 3) +
stat_density_2d(data = good_bad_places,
aes(x = longitude, y = latitude),
alpha = 0.6, color = "grey", size = 0.5) +
scale_color_manual(values = c("good" = "#2E8B57", "bad" = "#DC143C")) +
labs(title = "Transparent Dots + Density Contours",
color = "Vote Type")
Simple 2D density plot:
ggplot(data = good_bad_places, aes(x = longitude, y = latitude)) +
theme_map()+
stat_density_2d_filled(alpha = 0.8, bins = 15) +
geom_point(size = 0.5, alpha = 0.6) +
scale_fill_viridis_d(option = "plasma", name = "Density") +
labs(title = "2D Kernel Density Heatmap",
x = "Longitude", y = "Latitude")
Hexagonal binning - counts points in each hexagon:
ggplot(data = good_bad_places, aes(x = longitude, y = latitude)) +
theme_map()+
theme(panel.background = element_rect(fill = "black"),
plot.background = element_rect(fill = "black"),
plot.title = element_text(color = "yellow"),
legend.background = element_rect(fill = "black"),
legend.title = element_text(color = "yellow"),
legend.text = element_text(color = "yellow")
)+
stat_bin_hex(bins = 20, alpha = 0.8) +
scale_fill_gradientn(colours = c("grey", "black", "yellow"), name = "Count") +
labs(title = "Hexagonal Binning Heatmap",
subtitle = "Count of votes in each hexagonal cell",
x = "Longitude", y = "Latitude")
Hexagonal binning by vote type:
good_bad_places %>%
count(type, lon_bin = round(longitude, 3), lat_bin = round(latitude, 3)) %>%
ggplot(aes(x = lon_bin, y = lat_bin)) +
theme_map()+
stat_bin_hex(aes(weight = n), bins = 15, alpha = 0.8) +
facet_wrap(~type, labeller = as_labeller(c("good" = "Good Places", "bad" = "Bad Places"))) +
scale_fill_gradientn(colours = c("grey", "black", "yellow"), name = "Count") +
labs(title = "Hexagonal Binning by Vote Type",
x = "Longitude", y = "Latitude")
Without the spatial context the maps are pointless…
We can download some additional layers and add them to the map. Let’s try to add road information to the maps.
# Define Tartu bounding box
#tartu_bbox <- c(left = 26.69, bottom = 58.36, right = 26.75, top = 58.40)
st_bbox(votes_sf)
## xmin ymin xmax ymax
## 26.64568 58.34292 26.79125 58.39960
# Download roads
roads <- opq(bbox = st_bbox(votes_sf)) %>%
add_osm_feature(key = 'highway',
value = c('motorway', 'trunk', 'primary', 'secondary',
'tertiary', 'residential', 'unclassified')) %>%
osmdata_sf()
ggplot()+
geom_sf(data = roads$osm_lines, aes(color = highway), linewidth = .2)+
scale_colour_manual(values = c("grey80", "grey70", "grey50", "grey40", "grey20", "grey"))+
guides(colour = "none")
ggplot() +
theme_void() +
# Filled contours for transparent fill
stat_density_2d_filled(data = good_bad_places,
aes(x = longitude, y = latitude),
alpha = .7,
bins = 8) +
# Contour lines on top
stat_density_2d(data = good_bad_places,
aes(x = longitude, y = latitude),
color = "white",
size = 0.3,
alpha = 1) +
scale_fill_viridis_d(name = "Density") +
# roads
geom_sf(data = roads$osm_lines, linewidth = .2, color = "black") +guides(colour = "none") +
labs(title = "Transparent Fill + Contour Lines")
Author: Anto Aasa
Supervisors: Anto Aasa
LTOM.02.041
Last update: 2025-05-28 18:05:59.830887