library(tidyverse)
library(terra)
library(survival)Simulation of an integrated step selection analysis (iSSA)
Get started
Let’s load all necessary packages and set our seed for reproducibility.
set.seed(268)Next, we create a template raster.
rast <- rast(ext(c(-50, 50, -50, 50)), nrows = 250, ncols = 250, crs = "EPSG:6339")We will use the function normalize_raster_discrete to normalize our rasters to kernels.
normalize_raster_discrete <- function(x){
total_sum <- global(x, "sum", na.rm = TRUE)$sum
rst_pdf <- x/total_sum # Normalize to kernel
return(rst_pdf)
}Generate a movement track
Let’s create a track of 11 locations. We refer to these as our used locations.
used_locs_df <- tibble(id = 1:10,
x = sort(sample(1:50, size = 10)),
y = sort(sample(1:50, size = 10)),
t = 1:10) |>
add_row(id = 0, x = 0, y = 0, t = 0) |> # Add origin at t = 0
mutate(used = as.numeric(1)) |>
add_column(step_shape = NA,
step_rate = NA,
step_length = NA,
turn_angle = NA) |>
arrange(t) |>
as.data.frame()Movement is described through several properties, including step length and turning angle. The step length is the distance covered between consecutive steps (or a segment) given regular sampling, while the turning angle is the relative angle between consecutive segments.
Next, let’s calculate the step length and turning angle for our used locations.
for (i in 1:(nrow(used_locs_df)-1)){ # Skip last row
dx_1 <- used_locs_df$x[[i+1]] - used_locs_df$x[[i]]
dy_1 <- used_locs_df$y[[i+1]] - used_locs_df$y[[i]]
step_shape <- sqrt((dx_1)^2 + (dy_1)^2) # Find distance
step_rate <- used_locs_df$t[[i+1]] - used_locs_df$t[[i]] # Find time lag
step_length <- step_shape/step_rate # Find step length
used_locs_df$step_shape[[i]] <- step_shape
used_locs_df$step_rate[[i]] <- step_rate
used_locs_df$step_length[[i]] <- step_length
}for (i in 1:(nrow(used_locs_df)-2)){ # Skip last two rows
dx_1 <- used_locs_df$x[[i+1]] - used_locs_df$x[[i]]
dy_1 <- used_locs_df$y[[i+1]] - used_locs_df$y[[i]]
dx_2 <- used_locs_df$x[[i+2]] - used_locs_df$x[[i+1]]
dy_2 <- used_locs_df$y[[i+2]] - used_locs_df$y[[i+1]]
bearing_1 <- atan2(dy_1, dx_1) # Find bearing for segment 1
bearing_2 <- atan2(dy_2, dx_2) # Find bearing for segment 2
turn_angle <- bearing_2 - bearing_1 # Find difference in bearings (turning angle)
turn_angle <- (turn_angle + pi) %% (2 * pi) - pi # Normalize to pi
turn_angle <- turn_angle * 180 / pi # Convert to degrees
used_locs_df$turn_angle[[i]] <- turn_angle
}Create movement kernel
A movement kernel describes the probability of an animal moving from its current location to another in the absence of resource selection. We estimate this kernel from the used vs. available locations.
Let’s create a set of available location for time \(t\) using step length and turning angle at time \(t\).
available_locs_ls <- list() # Initialize empty listgenerate_available_locs <- function(df, n){
for (i in 1:(nrow(df)-2)){ # Skip last two rows
df_i <- df[i, ]
# Generate n available steps for x_0 and y_0
available_locs <- tibble(
id = df_i$id,
used = as.numeric(0),
# Use gamma distribution to randomly sample
step_length = rgamma(n, shape = df_i$step_shape, rate = df_i$step_rate),
# Use von Mises Distribution to randomly sample
turn_angle = CircStats::rvm(n, mean = 0, k = df_i$turn_angle),
x = df_i$x + step_length * cos(turn_angle),
y = df_i$y + step_length * sin(turn_angle),
t = df_i$t,
)
available_locs_ls[[i]] <- available_locs
}
available_locs_df <- bind_rows(available_locs_ls) # Unlist into data frame
return(available_locs_df)
}available_locs_df <- generate_available_locs(df = used_locs_df, n = 150)And bind the data frames for the used (used_locs_df) and available (available_locs_df) locations. This data structure will make our regression analysis easier.
all_locs_df <- used_locs_df |>
bind_rows(available_locs_df) |>
arrange(t)Create resource kernels
Resource selection refers to the “desirability” of an available spatial unit, given its resource value. This resource is typically described as an environmental feature, but it can be any type of feature of multidimensional space.
We use the function create_env_kernel to represent our environmental features.
create_env_kernel <- function(n, mean, sd, template = rast){
env_rast <- template
# Generate values and assign to raster cells
env_values <- sample(x = rnorm(n, mean = mean, sd), size = ncell(env_rast), replace = TRUE)
values(env_rast) <- env_values
# Add patchiness by averaging neighboring cells for each cell
env_rast <- focal(env_rast, w = 3, fun = "mean", na.rm = TRUE)
env_kernel <- normalize_raster_discrete(env_rast) # Normalize to kernel
return(env_kernel)
}Let’s create two environmental kernels.
env1_kernel <- create_env_kernel(n = 1e5, mean = 500, sd = 200)env2_kernel <- create_env_kernel(n = 1e5, mean = 900, sd = 250)
For each used and available location, we need to extract the value in each kernel.
annotate_locations <- function(locs_df, layer, col_name){
locs_vect <- vect(locs_df, geom = c("x", "y"), crs = "EPSG:6339")
if (crs(locs_vect) != crs(layer)){
locs_vect <- project(locs_vect, crs(layer))
}
layer_pts <- extract(layer, locs_vect, ID = FALSE) |>
as_tibble() |>
rename({{col_name}} := focal_mean)
locs_df <- bind_cols(locs_df, layer_pts)
return(locs_df)
}all_locs_df <- annotate_locations(all_locs_df, env1_kernel, "env1")
all_locs_df <- annotate_locations(all_locs_df, env2_kernel, "env2")Fit a conditional logistic regression
iSSA <- clogit(used ~ step_length + turn_angle + env1 + env2 + strata(id), data = all_locs_df)
summary(iSSA)Call:
coxph(formula = Surv(rep(1, 1361L), used) ~ step_length + turn_angle +
env1 + env2 + strata(id), data = all_locs_df, method = "exact")
n= 1359, number of events= 9
(2 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
step_length -2.952e-02 9.709e-01 1.445e-01 -0.204 0.83816
turn_angle -8.511e-02 9.184e-01 2.670e-02 -3.188 0.00143 **
env1 2.327e+05 Inf 1.889e+05 1.232 0.21796
env2 1.597e+05 Inf 2.264e+05 0.705 0.48076
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
step_length 0.9709 1.030 0.7314 1.2888
turn_angle 0.9184 1.089 0.8716 0.9677
env1 Inf 0.000 0.0000 Inf
env2 Inf 0.000 0.0000 Inf
Concordance= 0.567 (se = 0.162 )
Likelihood ratio test= 8.83 on 4 df, p=0.07
Wald test = 1.82 on 4 df, p=0.8
Score (logrank) test = 31.14 on 4 df, p=3e-06
iSSA_2 <- clogit(used ~ step_length + step_length*env1 + turn_angle + env1 + env2 + strata(id), data = all_locs_df)
summary(iSSA_2)Call:
coxph(formula = Surv(rep(1, 1361L), used) ~ step_length + step_length *
env1 + turn_angle + env1 + env2 + strata(id), data = all_locs_df,
method = "exact")
n= 1359, number of events= 9
(2 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
step_length -3.574e-01 6.995e-01 1.028e+00 -0.348 0.72804
env1 9.628e+04 Inf 4.588e+05 0.210 0.83379
turn_angle -8.471e-02 9.188e-01 2.655e-02 -3.190 0.00142 **
env2 1.593e+05 Inf 2.256e+05 0.706 0.48020
step_length:env1 1.896e+04 Inf 5.848e+04 0.324 0.74581
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
step_length 0.6995 1.430 0.09331 5.2438
env1 Inf 0.000 0.00000 Inf
turn_angle 0.9188 1.088 0.87218 0.9679
env2 Inf 0.000 0.00000 Inf
step_length:env1 Inf 0.000 0.00000 Inf
Concordance= 0.564 (se = 0.163 )
Likelihood ratio test= 8.94 on 5 df, p=0.1
Wald test = 1.92 on 5 df, p=0.9
Score (logrank) test = 31.29 on 5 df, p=8e-06