Simulation of an integrated step selection analysis (iSSA)

Authors

Alessandra Vidal Meza

Max Czapanskiy

Published

January 25, 2026

Get started

Let’s load all necessary packages and set our seed for reproducibility.

library(tidyverse)
library(terra)
library(survival)

set.seed(268)

Next, we create a template raster.

rast <- rast(ext(c(-50, 50, -50, 50)), nrows = 250, ncols = 250, crs = "EPSG:6339")

We will use the function normalize_raster_discrete to normalize our rasters to kernels.

normalize_raster_discrete <- function(x){
  
  total_sum <- global(x, "sum", na.rm = TRUE)$sum
  rst_pdf <- x/total_sum  # Normalize to kernel
  
  return(rst_pdf)
  
}

Generate a movement track

Let’s create a track of 11 locations. We refer to these as our used locations.

used_locs_df <- tibble(id = 1:10,
                       x = sort(sample(1:50, size = 10)),
                       y = sort(sample(1:50, size = 10)),
                       t = 1:10) |>
  add_row(id = 0, x = 0, y = 0, t = 0) |>   # Add origin at t = 0
  mutate(used = as.numeric(1)) |> 
  add_column(step_shape = NA,
             step_rate = NA,
             step_length = NA,
             turn_angle = NA) |>
  arrange(t) |> 
  as.data.frame()

Movement is described through several properties, including step length and turning angle. The step length is the distance covered between consecutive steps (or a segment) given regular sampling, while the turning angle is the relative angle between consecutive segments.

Next, let’s calculate the step length and turning angle for our used locations.

for (i in 1:(nrow(used_locs_df)-1)){   # Skip last row
    
    dx_1 <- used_locs_df$x[[i+1]] - used_locs_df$x[[i]] 
    dy_1 <- used_locs_df$y[[i+1]] - used_locs_df$y[[i]]
    
    step_shape <- sqrt((dx_1)^2 + (dy_1)^2)   # Find distance
    step_rate <- used_locs_df$t[[i+1]] - used_locs_df$t[[i]]  # Find time lag
    step_length <- step_shape/step_rate   # Find step length
      
    used_locs_df$step_shape[[i]] <- step_shape
    used_locs_df$step_rate[[i]] <- step_rate
    used_locs_df$step_length[[i]] <- step_length

}

for (i in 1:(nrow(used_locs_df)-2)){   # Skip last two rows
    
    dx_1 <- used_locs_df$x[[i+1]] - used_locs_df$x[[i]] 
    dy_1 <- used_locs_df$y[[i+1]] - used_locs_df$y[[i]]
  
    dx_2 <- used_locs_df$x[[i+2]] - used_locs_df$x[[i+1]]
    dy_2 <- used_locs_df$y[[i+2]] - used_locs_df$y[[i+1]]
    
    bearing_1 <- atan2(dy_1, dx_1)  # Find bearing for segment 1
    bearing_2 <- atan2(dy_2, dx_2)    # Find bearing for segment 2
    turn_angle <- bearing_2 - bearing_1   # Find difference in bearings (turning angle)
    turn_angle <- (turn_angle + pi) %% (2 * pi) - pi  # Normalize to pi
    turn_angle <- turn_angle * 180 / pi   # Convert to degrees
    
    used_locs_df$turn_angle[[i]] <- turn_angle

}

Create movement kernel

A movement kernel describes the probability of an animal moving from its current location to another in the absence of resource selection. We estimate this kernel from the used vs. available locations.

Let’s create a set of available location for time \(t\) using step length and turning angle at time \(t\).

available_locs_ls <- list()  # Initialize empty list

generate_available_locs <- function(df, n){
    
  for (i in 1:(nrow(df)-2)){   # Skip last two rows
    
    df_i <- df[i, ]
    
    # Generate n available steps for x_0 and y_0
    available_locs <- tibble(
      id = df_i$id,
      used = as.numeric(0),
      # Use gamma distribution to randomly sample
      step_length = rgamma(n, shape = df_i$step_shape, rate = df_i$step_rate),
      # Use von Mises Distribution to randomly sample
      turn_angle = CircStats::rvm(n, mean = 0, k = df_i$turn_angle), 
      x = df_i$x + step_length * cos(turn_angle), 
      y = df_i$y + step_length * sin(turn_angle),
      t = df_i$t,
      )
    
    available_locs_ls[[i]] <- available_locs
    
  }
  
  available_locs_df <- bind_rows(available_locs_ls)  # Unlist into data frame
  return(available_locs_df)
  
}

available_locs_df <- generate_available_locs(df = used_locs_df, n = 150)

And bind the data frames for the used (used_locs_df) and available (available_locs_df) locations. This data structure will make our regression analysis easier.

all_locs_df <- used_locs_df |> 
  bind_rows(available_locs_df) |> 
  arrange(t)

Create resource kernels

Resource selection refers to the “desirability” of an available spatial unit, given its resource value. This resource is typically described as an environmental feature, but it can be any type of feature of multidimensional space.

We use the function create_env_kernel to represent our environmental features.

create_env_kernel <- function(n, mean, sd, template = rast){
  
  env_rast <- template
  
  # Generate values and assign to raster cells
  env_values <- sample(x = rnorm(n, mean = mean, sd), size = ncell(env_rast), replace = TRUE)
  values(env_rast) <- env_values
  
  # Add patchiness by averaging neighboring cells for each cell
  env_rast <- focal(env_rast, w = 3, fun = "mean", na.rm = TRUE)
  
  env_kernel <- normalize_raster_discrete(env_rast)   # Normalize to kernel
  return(env_kernel)
  
}

Let’s create two environmental kernels.

env1_kernel <- create_env_kernel(n = 1e5, mean = 500, sd = 200)

env2_kernel <- create_env_kernel(n = 1e5, mean = 900, sd = 250)

For each used and available location, we need to extract the value in each kernel.

annotate_locations <- function(locs_df, layer, col_name){
  
  locs_vect <- vect(locs_df, geom = c("x", "y"), crs = "EPSG:6339")
  
  if (crs(locs_vect) != crs(layer)){
    locs_vect <- project(locs_vect, crs(layer))
  }
  
  layer_pts <- extract(layer, locs_vect, ID = FALSE) |> 
    as_tibble() |>
    rename({{col_name}} := focal_mean)
      
  locs_df <- bind_cols(locs_df, layer_pts)
  return(locs_df)
  
}

all_locs_df <- annotate_locations(all_locs_df, env1_kernel, "env1")
all_locs_df <- annotate_locations(all_locs_df, env2_kernel, "env2")

Fit a conditional logistic regression

iSSA <- clogit(used ~ step_length + turn_angle + env1 + env2 + strata(id), data = all_locs_df)
summary(iSSA)

Call:
coxph(formula = Surv(rep(1, 1361L), used) ~ step_length + turn_angle + 
    env1 + env2 + strata(id), data = all_locs_df, method = "exact")

  n= 1359, number of events= 9 
   (2 observations deleted due to missingness)

                  coef  exp(coef)   se(coef)      z Pr(>|z|)   
step_length -2.952e-02  9.709e-01  1.445e-01 -0.204  0.83816   
turn_angle  -8.511e-02  9.184e-01  2.670e-02 -3.188  0.00143 **
env1         2.327e+05        Inf  1.889e+05  1.232  0.21796   
env2         1.597e+05        Inf  2.264e+05  0.705  0.48076   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

            exp(coef) exp(-coef) lower .95 upper .95
step_length    0.9709      1.030    0.7314    1.2888
turn_angle     0.9184      1.089    0.8716    0.9677
env1              Inf      0.000    0.0000       Inf
env2              Inf      0.000    0.0000       Inf

Concordance= 0.567  (se = 0.162 )
Likelihood ratio test= 8.83  on 4 df,   p=0.07
Wald test            = 1.82  on 4 df,   p=0.8
Score (logrank) test = 31.14  on 4 df,   p=3e-06

iSSA_2 <- clogit(used ~ step_length + step_length*env1 + turn_angle + env1 + env2 + strata(id), data = all_locs_df)
summary(iSSA_2)

Call:
coxph(formula = Surv(rep(1, 1361L), used) ~ step_length + step_length * 
    env1 + turn_angle + env1 + env2 + strata(id), data = all_locs_df, 
    method = "exact")

  n= 1359, number of events= 9 
   (2 observations deleted due to missingness)

                       coef  exp(coef)   se(coef)      z Pr(>|z|)   
step_length      -3.574e-01  6.995e-01  1.028e+00 -0.348  0.72804   
env1              9.628e+04        Inf  4.588e+05  0.210  0.83379   
turn_angle       -8.471e-02  9.188e-01  2.655e-02 -3.190  0.00142 **
env2              1.593e+05        Inf  2.256e+05  0.706  0.48020   
step_length:env1  1.896e+04        Inf  5.848e+04  0.324  0.74581   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                 exp(coef) exp(-coef) lower .95 upper .95
step_length         0.6995      1.430   0.09331    5.2438
env1                   Inf      0.000   0.00000       Inf
turn_angle          0.9188      1.088   0.87218    0.9679
env2                   Inf      0.000   0.00000       Inf
step_length:env1       Inf      0.000   0.00000       Inf

Concordance= 0.564  (se = 0.163 )
Likelihood ratio test= 8.94  on 5 df,   p=0.1
Wald test            = 1.92  on 5 df,   p=0.9
Score (logrank) test = 31.29  on 5 df,   p=8e-06