Files
forested/R/functions.R
2026-02-10 04:52:37 -05:00

2288 lines
79 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#' Helper: Save Plot for Quarto Slide
#'
#' @description Saves a ggplot object as a PNG, sized to fit comfortably
#' below a standard slide title, with robust font handling.
#'
#' @param plot_obj The ggplot object to save.
#' @param name A short descriptive name (e.g., "wa_ecoregions").
#' @param type Either "map" or "plot". Adds this prefix to the filename.
#' @param width Width in inches (default: 10).
#' @param height Height in inches (default: 6.18).
#' @param dpi Resolution (default: 300).
#'
#' @return The full file path (invisibly).
#' @export
helper_save_fig <- function(plot_obj, name, type = c("map", "plot"),
width = 10, height = 6.18, dpi = 300) {
# 1. Input Validation & Directory Setup
type <- match.arg(type)
if (!dir.exists("figs")) dir.create("figs")
# Construct filename: figs/map_washington.png
filename <- paste0("figs/", type, "_", name, ".png")
# 2. Force Fonts (The "Amnesia" Fix)
# Critical: We must turn the engine on immediately before saving
# to prevent the new graphics device from reverting to Arial.
sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
showtext::showtext_auto()
showtext::showtext_opts(dpi = dpi)
# 3. Save the File
ggplot2::ggsave(
filename = filename,
plot = plot_obj,
width = width,
height = height,
dpi = dpi,
bg = "white" # Prevents transparent backgrounds
)
message(paste("Saved:", filename))
return(invisible(filename))
}
#' Register Project Fonts
#'
#' Registers 'Atkinson Hyperlegible Next' (Sans) and 'Atkinson Hyperlegible Mono'
#' (Monospace) with the sysfonts package for use in R graphics.
#'
#' @return NULL (called for side effects)
#' @export
setup_forestry_fonts <- function() {
font_dir <- "assets/fonts"
if (!dir.exists(font_dir)) {
warning(paste("Font directory not found at:", font_dir))
return(NULL)
}
# 1. Main Text Font (Sans)
sysfonts::font_add(
family = "atkinson",
regular = file.path(font_dir, "AtkinsonHyperlegibleNext-Regular.ttf"),
bold = file.path(font_dir, "AtkinsonHyperlegibleNext-Bold.ttf"),
italic = file.path(font_dir, "AtkinsonHyperlegibleNext-Italic.ttf"),
bolditalic = file.path(font_dir, "AtkinsonHyperlegibleNext-BoldItalic.ttf")
)
# 2. Code/Number Font (Mono)
sysfonts::font_add(
family = "atkinson_mono",
regular = file.path(font_dir, "AtkinsonHyperlegibleMono-Regular.ttf"),
bold = file.path(font_dir, "AtkinsonHyperlegibleMono-Bold.ttf")
)
# 3. Enable showtext rendering
showtext::showtext_auto()
showtext::showtext_opts(dpi = 300)
}
#' Standardized Void Theme (Maximal Data Ink)
#' @description Removes axes/grids for shape-focused maps, but keeps project fonts.
#' @export
theme_forestry_void <- function(base_size = 16) {
# 1. Start with the void (empty) theme
ggplot2::theme_void(base_family = "atkinson", base_size = base_size) +
# 2. Add back the specific text elements we need
ggplot2::theme(
# Keep the Legend styled correctly
legend.position = "bottom",
legend.title = ggplot2::element_text(
family = "atkinson",
face = "bold",
size = ggplot2::rel(1)
),
legend.text = ggplot2::element_text(
family = "atkinson",
size = ggplot2::rel(0.9)
),
# Keep Titles (if you use them inside the plot)
plot.title = ggplot2::element_text(
family = "atkinson",
face = "bold",
hjust = 0.5,
margin = ggplot2::margin(b = 5)
),
# Zero margins to force edges to touch
plot.margin = ggplot2::margin(5, 5, 5, 5)
)
}
#' Standard Forestry Plot Theme (Cowplot + Atkinson)
#'
#' @description A standardized theme for non-spatial plots (scatter, bar, line).
#' Based on cowplot::theme_cowplot(), it includes clean axes and a minimalist look.
#' Uses 'Atkinson Hyperlegible Next' for all text.
#'
#' @param font_size Integer. Base font size. Default is 14 (good for slides).
#' @param grid Logical. If TRUE, adds a light gray grid (useful for presentations).
#'
#' @importFrom cowplot theme_cowplot background_grid
#' @importFrom ggplot2 theme element_text element_rect margin
#' @export
theme_forestry_plot <- function(font_size = 14, grid = TRUE) {
# 1. Ensure Fonts are Registered
setup_forestry_fonts()
# 2. Base Theme (Cowplot with Atkinson)
# cowplot::theme_cowplot has a 'font_family' argument we can leverage directly
base <- cowplot::theme_cowplot(font_size = font_size, font_family = "atkinson") +
ggplot2::theme(
# Fonts: We force family="atkinson" here just to be safe for overrides
plot.title = ggplot2::element_text(family = "atkinson", face = "bold", hjust = 0.5, size = font_size + 2),
axis.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
legend.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
legend.text = ggplot2::element_text(family = "atkinson", size = font_size - 1),
# Optional: Use Monospace for axis numbers? (Uncomment if you want the "technical" look)
# axis.text = ggplot2::element_text(family = "atkinson_mono", size = font_size - 1),
# Backgrounds (Transparent for slides)
plot.background = ggplot2::element_rect(fill = "transparent", color = NA),
panel.background = ggplot2::element_rect(fill = "transparent", color = NA),
# Margins
legend.box.margin = ggplot2::margin(10, 10, 10, 10)
)
# 3. Optional Grid
if (grid) {
base <- base + cowplot::background_grid(major = "y", minor = "none", color.major = "grey92")
}
return(base)
}
#' Standardized Spatial Theme (Atkinson)
#' @description High-visibility map theme for presentations using Atkinson fonts.
#' @importFrom ggplot2 theme_minimal theme element_line element_blank element_text element_rect margin unit
#' @export
theme_forestry_spatial <- function(base_size = 16) {
# 1. Ensure Fonts are Registered
setup_forestry_fonts()
# 2. Base Theme with Relative Sizing
ggplot2::theme_minimal(base_family = "atkinson", base_size = base_size) +
ggplot2::theme(
# Minimalist grid lines (Graticules)
panel.grid.major = ggplot2::element_line(color = "grey90", linewidth = 0.2),
panel.grid.minor = ggplot2::element_blank(),
# Titles: Scale relative to base_size (12 * 1.5 = 18pt)
plot.title = ggplot2::element_text(
family = "atkinson",
face = "bold",
size = ggplot2::rel(1.5),
color = "#2c3e50"
),
# Axis Labels: Slightly smaller than base (12 * 0.8 = 9.6pt)
axis.text = ggplot2::element_text(
family = "atkinson",
size = ggplot2::rel(0.8),
color = "#5d6d7e"
),
# Legend: Scale relative to base
legend.position = "right",
legend.title = ggplot2::element_text(
family = "atkinson",
size = ggplot2::rel(1.1), # ~13.2pt
face = "bold"
),
legend.text = ggplot2::element_text(
family = "atkinson",
size = ggplot2::rel(0.9) # ~10.8pt
),
# Clean borders
panel.border = ggplot2::element_rect(color = "grey80", fill = NA, linewidth = 0.5),
plot.margin = ggplot2::margin(5, 5, 5, 5)
)
}
#' Simplified Theme Diagnostic
#' @description Uses built-in NC data to verify theme_forestry_spatial.
#' @importFrom sf st_read st_crs
#' @importFrom ggplot2 ggplot geom_sf labs coord_sf scale_x_continuous scale_y_continuous
#' @export
plot_theme_diagnostic <- function() {
# Pull NC data included with the sf package
nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
# Render with your spatial theme
ggplot2::ggplot(nc) +
ggplot2::geom_sf(fill = "#27ae60", alpha = 0.3, color = "#2c3e50") +
# Force WGS84 graticules (Decimal Degrees)
ggplot2::coord_sf(datum = sf::st_crs(4326)) +
theme_forestry_spatial() +
ggplot2::labs(
title = "Theme Diagnostic: North Carolina",
subtitle = "Testing 24pt Title and 14pt Graticule Labels"
)
}
#' Combine Washington and Georgia Forest Data
#'
#' Merges the Washington and Georgia datasets into a single data frame, adding a
#' column to identify the source state.
#'
#' @param wa_data A data frame containing the Washington forest inventory data.
#' @param ga_data A data frame containing the Georgia forest inventory data.
#'
#' @return A single combined data frame with an additional column `.id`
#' (renamed to "state") indicating the source ("WA" or "GA").
#'
#' @examples
#' \dontrun{
#' combined <- combine_forest(wa_raw, ga_raw)
#' table(combined$state)
#' }
#'
#' @export
combine_forest <- function(wa_data, ga_data){
dplyr::bind_rows(
list(WA = wa_data, GA = ga_data),
.id = "state"
)
}
#' Format Statistical Summary Table
#'
#' @description Calculates descriptive statistics and returns a formatted `gt` table
#' ready for slide presentation.
#'
#' @param data A data frame or sf object.
#' @return A `gt` table object.
#'
#' @importFrom dplyr select arrange desc any_of where
#' @importFrom sf st_drop_geometry
#' @importFrom psych describe
#' @importFrom tibble rownames_to_column
#' @importFrom gt gt fmt_number cols_label tab_options google_font
#' @export
format_summary_table <- function(data) {
# 1. Calculate Stats (The Logic)
stats_df <- data %>%
sf::st_drop_geometry() %>%
dplyr::select(dplyr::where(is.numeric)) %>%
dplyr::select(-dplyr::any_of(c("year", "lon", "lat"))) %>%
psych::describe() %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "Variable") %>%
dplyr::select(-vars, -range, -se, -mad) %>%
dplyr::arrange(dplyr::desc(abs(kurtosis)))
# 2. Format Table (The Presentation)
stats_df %>%
gt::gt() %>%
gt::opt_table_font(
font = gt::google_font("Atkinson Hyperlegible")
) %>%
# Clean up column labels
gt::cols_label(
Variable = "Variable",
n = "N",
mean = "Mean",
sd = "SD",
median = "Median",
trimmed = "Trimmed",
min = "Min",
max = "Max",
skew = "Skew",
kurtosis = "Kurtosis"
) %>%
# Format all numbers to 2 decimal places
gt::fmt_number(
columns = dplyr::where(is.numeric),
decimals = 2
) %>%
# Format 'N' to 0 decimals
gt::fmt_number(
columns = "n",
decimals = 0
) %>%
# Styling: Increase font size and use your project font
gt::tab_options(
table.font.names = "Atkinson Hyperlegible",
table.font.size = "22px", # <--- Increases size to fill the slide
data_row.padding = "8px" # Adds breathing room
) %>%
gt::text_transform(
locations = gt::cells_body(columns = dplyr::where(is.numeric)),
fn = function(x) {
# \u2212 is the unicode minus that breaks things. Replace with standard "-".
gsub("\u2212", "-", x)
}
)
}
#' Plot US Map with Forestry Theme
#' @description Highlights Washington and Georgia using standardized presentation fonts.
#' @importFrom tigris states shift_geometry
#' @importFrom rmapshaper ms_simplify ms_filter_islands
#' @importFrom dplyr filter mutate
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_manual labs
#' @export
plot_us_map <- function() {
# 1. Fetch and filter for the Lower 48
us_map <- tigris::states(cb = TRUE, resolution = "20m", year = 2022, progress_bar = FALSE) %>%
dplyr::filter(!STUSPS %in% c("AK", "HI", "PR")) %>%
rmapshaper::ms_simplify(keep = 0.2) %>%
rmapshaper::ms_filter_islands(min_area = 1e9) %>%
tigris::shift_geometry() %>%
dplyr::mutate(
highlight = dplyr::case_when(
STUSPS == "WA" ~ "Washington",
STUSPS == "GA" ~ "Georgia",
TRUE ~ NA_character_
)
)
# 2. Define Colors
state_colors <- c("Washington" = "#E16A86", "Georgia" = "#00AD9A")
# 3. Plot
ggplot2::ggplot() +
# Layer 1: Base Map
ggplot2::geom_sf(data = us_map, fill = "grey98", color = "grey50", linewidth = 0.25) +
# Layer 2: Highlight Layer
ggplot2::geom_sf(
data = dplyr::filter(us_map, !is.na(highlight)),
ggplot2::aes(fill = highlight),
color = "grey50",
linewidth = 0.25
) +
# Custom Scales
ggplot2::scale_fill_manual(values = state_colors, name = "Study Sites") +
# Standardized Forestry Theme
theme_forestry_spatial()
}
#' Create Single State Topo Plot
#' @description Generates a topo map with manually tuned label placement for Georgia.
#' @export
plot_state_topo <- function(data, boundary_sf, raster_path, state_name) {
# 1. Load & Process Raster
elev_terra <- terra::rast(raster_path)
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
# 2. Prep Vectors
boundary_sf <- sf::st_transform(boundary_sf, 4326)
peaks_sf <- get_state_peaks(state_name)
# --- FIX: Manual Nudge Logic ---
# Define default nudges (0 means "let ggrepel decide")
nx <- 0
ny <- 0
if (state_name == "Georgia" && !is.null(peaks_sf)) {
# Prepare nudge vectors specific to the Georgia peaks to uncross the lines
# Units are in Decimal Degrees (roughly: 0.1 deg ~ 11km)
peaks_sf <- peaks_sf %>%
dplyr::mutate(
nudge_x = dplyr::case_when(
peak == "Rabun Bald" ~ 0.3, # Push Hard Right (East)
peak == "Blood Mtn" ~ -0.3, # Push Hard Left (West)
peak == "Brasstown Bald" ~ 0, # Keep Center horizontally
TRUE ~ 0
),
nudge_y = dplyr::case_when(
peak == "Brasstown Bald" ~ 0.2, # Push Up (North)
peak == "Rabun Bald" ~ -0.1, # Push Down slightly
TRUE ~ 0
)
)
# Extract as vectors for ggrepel
nx <- peaks_sf$nudge_x
ny <- peaks_sf$nudge_y
}
# 3. Plot
ggplot2::ggplot() +
tidyterra::geom_spatraster(data = elev_terra) +
tidyterra::scale_fill_hypso_c(
palette = "usgs-gswa2",
name = "Elevation (m)",
na.value = "transparent"
) +
tidyterra::geom_spatraster(
data = hill_terra,
ggplot2::aes(alpha = ggplot2::after_stat(value)),
fill = "black",
show.legend = FALSE
) +
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
# REPEL SETTINGS
{if(!is.null(peaks_sf)) ggrepel::geom_label_repel(
data = peaks_sf, ggplot2::aes(label = peak, geometry = geometry),
stat = "sf_coordinates",
size = 6,
family = "atkinson",
fontface = "bold",
# Inject the manual nudges here
nudge_x = nx,
nudge_y = ny,
box.padding = 0.8,
point.padding = 0.5,
force = 10,
min.segment.length = 0
)} +
theme_forestry_void(base_size = 16) +
ggplot2::coord_sf(expand = FALSE) +
ggplot2::guides(
fill = ggplot2::guide_colorbar(
title.position = "top",
title.hjust = 0.5,
barwidth = ggplot2::unit(5, "cm"),
barheight = ggplot2::unit(0.3, "cm"),
frame.colour = "black",
ticks.colour = "black"
)
) +
ggplot2::theme(
legend.position = "bottom",
legend.margin = ggplot2::margin(t = 5, b = 0)
)
}
#' Save Combined Side-by-Side Topo Plot
#' @export
save_combined_topo <- function(wa_data, ga_data, wa_boundary, ga_boundary,
wa_raster_path, ga_raster_path, output_path) {
# [FIX 2] Ensure fonts are registered & showtext is ON for this worker process
setup_forestry_fonts()
p_wa <- plot_state_topo(wa_data, wa_boundary, wa_raster_path, "Washington")
p_ga <- plot_state_topo(ga_data, ga_boundary, ga_raster_path, "Georgia")
combined_plot <- patchwork::wrap_plots(p_wa, p_ga, ncol = 2) +
patchwork::plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')',
theme = theme_forestry_void(base_size = 20)
)
dir.create(dirname(output_path), showWarnings = FALSE)
# Save
ggplot2::ggsave(output_path, plot = combined_plot, width = 12, height = 6, dpi = 300, bg = "white")
return(output_path)
}
#' Plot Regional Comparison of Forested Data (WA vs GA)
#'
#' @description Generates a side-by-side comparison of forest cover for Washington
#' and Georgia. It handles font registration (Atkinson Hyperlegible), spatial
#' transformations, and creates a combined plot with a shared legend.
#'
#' @param data A data frame containing the forest point data. Must contain
#' columns `lon`, `lat`, `state`, and `forested`.
#' @param boundaries An `sf` object containing state boundaries. Must contain
#' a `NAME` column.
#'
#' @return A `patchwork` object containing the combined side-by-side maps.
#'
#' @importFrom sysfonts font_add_google
#' @importFrom showtext showtext_auto
#' @importFrom dplyr filter
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual coord_sf guides guide_legend theme_void theme element_text
#' @importFrom patchwork wrap_plots plot_layout plot_annotation
#'
#' @export
plot_regional_comparison <- function(data, boundaries) {
# Load the font from Google Fonts (or a local file)
font_add_google("Atkinson Hyperlegible", family = "atkinson")
# Turn on showtext to render the font
showtext_auto()
# Spatial Prep (Fixing the NAD83/WGS84 mismatch)
forest_sf <- data %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_transform(sf::st_crs(boundaries))
state_lookup <- c("Washington" = "WA", "Georgia" = "GA")
# Washington Plot
abbr_wa <- state_lookup["Washington"]
p_wa <- ggplot2::ggplot() +
# State Boundary
ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Washington"),
fill = "grey98", color = "grey80") +
# Forest Points
ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_wa),
ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
# Scales
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"),
name = "Forested") +
ggplot2::coord_sf(expand = TRUE) +
# Legend Tweaks
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
theme_void()
# Georgia Plot
abbr_ga <- state_lookup["Georgia"]
p_ga <- ggplot2::ggplot() +
# State Boundary
ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Georgia"),
fill = "grey98", color = "grey80") +
# Forest Points
ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_ga),
ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
# Scales
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"),
name = "Forested") +
ggplot2::coord_sf(expand = TRUE) +
# Legend Tweaks
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
theme_void()
# Combine
final_plot <- patchwork::wrap_plots(p_wa, p_ga) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')'
) &
ggplot2::theme(
text = element_text(family = "atkinson", size = 16),
legend.position = "bottom"
)
return(final_plot)
}
#' Download EPA Level III Ecoregions Data
#'
#' @description Downloads the EPA Level III Ecoregions shapefile (zip format)
#' to a local directory. Implements a caching check to avoid re-downloading
#' if the file already exists.
#'
#' @param url Character string. The direct URL to the EPA Ecoregions zip file.
#' @param dest_dir Character string. The local directory where the data should
#' be saved. Defaults to "data/epa".
#'
#' @return A character string containing the full file path to the downloaded zip file.
#' This return value is designed to be tracked by \code{targets}.
#'
#' @importFrom utils download.file
#' @export
get_epa_ecoregions <- function(url, dest_dir = "data/epa") {
# Ensure directory exists
if (!dir.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE)
# Define destination
dest_file <- file.path(dest_dir, "us_eco_l3.zip")
# Download only if missing
if (!file.exists(dest_file)) {
message("Downloading EPA Ecoregion Data...")
download.file(url, dest_file, mode = "wb")
}
# Return the file path for the next target to track
return(dest_file)
}
#' Process and Clip EPA Ecoregions
#'
#' @description Extracts EPA Level III ecoregion data from a zipped shapefile,
#' standardizes column names, and clips the geometry to specified state boundaries.
#' It includes robust steps for geometry repair (handling spherical validity),
#' small island removal, and simplification for optimized plotting.
#'
#' @param zip_path Character string. The file path to the zipped EPA shapefile.
#' @param target_states Character vector. The names of the states to clip the
#' ecoregions to. Defaults to \code{c("Washington", "Georgia")}.
#' @param simplify_tol Numeric. The simplification tolerance passed to
#' \code{rmapshaper::ms_simplify}. Range is 0-1, where higher numbers remove more detail.
#' Defaults to 0.05.
#'
#' @return An \code{sf} object containing the processed ecoregions with standardized
#' columns \code{US_L3NAME} and \code{STATE_NAME}.
#'
#' @importFrom tigris states
#' @importFrom dplyr filter rename select %>%
#' @importFrom rmapshaper ms_simplify ms_filter_islands
#' @importFrom sf read_sf st_transform st_crs st_make_valid st_intersection sf_use_s2
#' @importFrom utils unzip
#' @export
process_ecoregions <- function(zip_path, target_states = c("Washington", "Georgia"), simplify_tol = 0.05) {
message("Processing shapefiles for: ", paste(target_states, collapse = ", "))
# A. Fetch State Boundaries (The "Cookie Cutter")
boundaries <- tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
filter(NAME %in% target_states) %>%
# Simplify boundaries to match the ecoregion resolution
rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
# B. Unzip and Read EPA Data
clean_dir <- tempfile()
dir.create(clean_dir)
unzip(zip_path, exdir = clean_dir)
shp_file <- list.files(clean_dir, pattern = "\\.shp$", full.names = TRUE, recursive = TRUE)[1]
eco_raw <- sf::read_sf(shp_file)
# C. Standardize Columns (Handle variations in column names)
if (!"US_L3NAME" %in% names(eco_raw)) {
if ("NA_L3NAME" %in% names(eco_raw)) {
eco_raw <- rename(eco_raw, US_L3NAME = NA_L3NAME)
} else if ("LEVEL3_NAM" %in% names(eco_raw)) {
eco_raw <- rename(eco_raw, US_L3NAME = LEVEL3_NAM)
}
}
# D. Clip, Clean, and Simplify
suppressMessages(sf::sf_use_s2(FALSE)) # Turn off spherical geometry for robust clipping
eco_clean <- eco_raw %>%
st_transform(st_crs(boundaries)) %>%
st_make_valid() %>%
st_intersection(boundaries) %>%
select(US_L3NAME, STATE_NAME = NAME) %>%
rmapshaper::ms_filter_islands(min_area = 200000000) %>%
rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
suppressMessages(sf::sf_use_s2(TRUE))
return(eco_clean)
}
#' Plot Ecoregion Complexity Comparison (WA vs GA)
#'
#' @description Generates a side-by-side comparison of Level III ecoregions for Washington
#' and Georgia. It uses a "void" theme, qualitative colors, and carefully tuned
#' label repulsion settings to avoid overlapping text.
#'
#' @param eco_data An \code{sf} object containing ecoregion polygons. Must contain
#' columns \code{STATE_NAME} and \code{US_L3NAME}.
#'
#' @return A \code{patchwork} object containing the combined plot.
#'
#' @importFrom dplyr filter group_by summarize mutate
#' @importFrom sf st_transform st_union st_point_on_surface st_coordinates st_drop_geometry
#' @importFrom stringr str_wrap
#' @importFrom ggplot2 ggplot geom_sf aes coord_sf theme_void scale_fill_discrete guide_none element_text margin element_rect labs
#' @importFrom ggrepel geom_label_repel
#' @importFrom colorspace scale_fill_discrete_qualitative
#' @importFrom patchwork plot_annotation
#'
#' @export
plot_ecoregion_comparison <- function(eco_data) {
# 1. WASHINGTON PLOT
# A. Filter & Transform
wa_e <- eco_data %>%
dplyr::filter(STATE_NAME == "Washington") %>%
sf::st_transform(4326)
# B. Calculate Centroids for Labels
wa_labels <- wa_e %>%
dplyr::group_by(US_L3NAME) %>%
dplyr::summarize(geometry = sf::st_union(geometry)) %>%
dplyr::mutate(
lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
clean_label = stringr::str_wrap(US_L3NAME, width = 35)
) %>%
sf::st_drop_geometry()
# C. Generate Plot
p_wa <- ggplot2::ggplot() +
# Geometries
ggplot2::geom_sf(data = wa_e, ggplot2::aes(fill = US_L3NAME),
alpha = 1, color = "white", linewidth = 0.2) +
ggplot2::coord_sf(datum = NA, expand = TRUE) +
# Repelled Labels
ggrepel::geom_label_repel(
data = wa_labels,
ggplot2::aes(x = lon, y = lat, label = clean_label),
inherit.aes = FALSE,
size = 5,
family = "atkinson",
lineheight = 1.6,
box.padding = 0.75,
min.segment.length = 0,
force = 30,
max.overlaps = Inf,
alpha = 0.95,
segment.color = "grey30",
segment.size = 1,
seed = 42
) +
# Colors
colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
# Manual Theme
ggplot2::theme_void(base_family = "atkinson")
# 2. GEORGIA PLOT
# A. Filter & Transform
ga_e <- eco_data %>%
dplyr::filter(STATE_NAME == "Georgia") %>%
sf::st_transform(4326)
# B. Calculate Centroids for Labels
ga_labels <- ga_e %>%
dplyr::group_by(US_L3NAME) %>%
dplyr::summarize(geometry = sf::st_union(geometry)) %>%
dplyr::mutate(
lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
clean_label = stringr::str_wrap(US_L3NAME, width = 35)
) %>%
sf::st_drop_geometry()
# C. Generate Plot
p_ga <- ggplot2::ggplot() +
# Geometries
ggplot2::geom_sf(data = ga_e, ggplot2::aes(fill = US_L3NAME),
alpha = 1, color = "white", linewidth = 0.2) +
ggplot2::coord_sf(datum = NA, expand = TRUE) +
# Repelled Labels
ggrepel::geom_label_repel(
data = ga_labels,
ggplot2::aes(x = lon, y = lat, label = clean_label),
inherit.aes = FALSE,
size = 5,
family = "atkinson",
lineheight = 1.6,
box.padding = 0.75,
min.segment.length = 0,
force = 40,
max.overlaps = Inf,
alpha = 0.95,
segment.color = "grey30",
segment.size = 1,
seed = 4
) +
# Colors
colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
# Manual Theme
ggplot2::theme_void(base_family = "atkinson") +
ggplot2::theme(
plot.title = ggplot2::element_text(face = "bold", margin = ggplot2::margin(b = 10)),
plot.tag = ggplot2::element_text(face = "bold"),
legend.position = "bottom"
) +
ggplot2::labs(x = NULL, y = NULL)
# 3. COMBINE & ANNOTATE
combined_plot <- (p_wa + p_ga) +
patchwork::plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')'
) &
# Apply theme settings globally to the patchwork
ggplot2::theme_void(base_family = "atkinson")
return(combined_plot)
}
#' Plot Univariate Distributions by Forest Cover
#'
#' @description Generates a faceted density plot comparing numeric environmental
#' variables between forested and non-forested areas. It uses the "Dark 3"
#' qualitative palette and applys the Wilke cowplot theme.
#'
#' @param data A data frame or sf object containing the forest data.
#' Must include `forested` column and numeric environmental variables.
#'
#' @return A `ggplot` object.
#'
#' @importFrom dplyr select starts_with
#' @importFrom tidyr pivot_longer
#' @importFrom ggplot2 ggplot aes geom_density facet_wrap scale_fill_manual labs
#' @importFrom cowplot theme_cowplot
#' @importFrom colorspace qualitative_hcl
#' @importFrom sysfonts font_add_google
#' @importFrom showtext showtext_auto
#'
#' @export
plot_forest_distributions <- function(data) {
# 1. Typography Setup
sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
showtext::showtext_auto()
# 2. Color Palette (Dark 3 from colorspace)
# Generating the hex codes specifically for 2 levels
pal_colors <- colorspace::qualitative_hcl(palette = "Dark 3", n = 2)
# 3. Plotting
data %>%
dplyr::select(
forested, elevation, eastness, northness, roughness,
dew_temp, precip_annual,
dplyr::starts_with("temp_"),
dplyr::starts_with("vapor_")
) %>%
tidyr::pivot_longer(
cols = -forested,
names_to = "variable",
values_to = "value"
) %>%
ggplot2::ggplot(ggplot2::aes(x = value, fill = forested)) +
# color = NA removes the outline for a cleaner "Wilke-style" look
ggplot2::geom_density(alpha = 0.6, color = NA) +
ggplot2::facet_wrap(~ variable, scales = "free") +
# Scales & Labels
ggplot2::scale_fill_manual(values = pal_colors) +
ggplot2::labs(
x = NULL,
y = "Density",
fill = "Forested"
) +
ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05))) +
ggplot2::scale_x_continuous(
expand = ggplot2::expansion(mult = c(0, 0)),
labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
# Theme
cowplot::theme_cowplot(font_family = "atkinson", font_size = 22) +
# Minor tweaks to spacing
ggplot2::theme(
legend.position = "bottom",
strip.background = ggplot2::element_blank(), # Clean facet headers
strip.text = ggplot2::element_text(face = "bold")
)
}
identify_outliers <- function(data){
# 1. Prepare the data (calculate Z-scores first)
plot_data <- data %>%
sf::st_drop_geometry() %>%
dplyr::select(forested, where(is.numeric)) %>%
dplyr::select(-any_of(c("lon", "lat", "year"))) %>%
tidyr::pivot_longer(cols = -forested, names_to = "variable", values_to = "value") %>%
dplyr::group_by(variable) %>%
# Wrap scale in as.numeric to fix the matrix warning
dplyr::mutate(scaled_value = as.numeric(scale(value))) %>%
dplyr::ungroup()
# 2. Plot
ggplot2::ggplot(plot_data, aes(x = forested, y = scaled_value, fill = forested)) +
# Draw the main distribution (suppress default outliers)
ggplot2::geom_boxplot(outlier.shape = NA, alpha = 0.4) +
# CRITICAL FIX: Only pass the extreme values to the points layer
ggplot2::geom_jitter(
data = dplyr::filter(plot_data, abs(scaled_value) > 3),
width = 0.2,
alpha = 0.6,
size = 1.5 # Make the outliers pop
) +
ggplot2::facet_wrap(~variable, scales = "free_y") +
ggplot2::theme_minimal() +
ggplot2::scale_fill_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
ggplot2::labs(
title = "Standardized Distribution & Extremes",
subtitle = "Points indicate observations > 3 Standard Deviations from mean",
y = "Z-Score (σ)"
)
}
fetch_study_area <- function(target_states) {
tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
dplyr::filter(NAME %in% target_states) %>%
rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
}
fetch_state_boundary <- function(state){
tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
dplyr::filter(NAME == state) %>%
rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
}
create_elevation_raster <- function(boundary_sf, path) {
elev_raster <- elevatr::get_elev_raster(locations = boundary_sf, z = 7, clip = "locations")
elev_terra <- terra::rast(elev_raster) %>% terra::mask(terra::vect(boundary_sf))
terra::writeRaster(elev_terra, path, overwrite = TRUE)
return(path)
}
#' Save Outlier Diagnostic Map
#'
#' @description Generates a diagnostic map highlighting multivariate outliers (Z > 3)
#' overlaid on a hillshaded elevation raster. Uses the standardized forestry theme.
#'
#' @param data A data frame containing the analysis dataset (must include numeric columns and 'forested' factor).
#' @param boundary_sf An \code{sf} object representing the state boundary (e.g., Washington).
#' @param raster_path Character string. File path to the elevation raster (.tif).
#' @param output_path Character string. File path where the PNG will be saved.
#'
#' @importFrom terra rast terrain shade
#' @importFrom sf st_transform st_as_sf st_filter st_drop_geometry
#' @importFrom dplyr select filter if_any all_of mutate
#' @importFrom tibble tibble
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_manual labs ggsave guides guide_legend
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
#' @importFrom ggrepel geom_label_repel
#'
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
#' @export
save_outlier_map_png <- function(data, boundary_sf, raster_path, output_path) {
# 1. Load & Process Raster
elev_terra <- terra::rast(raster_path)
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
# 2. Prep Vectors
wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
data_sf <- data %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_filter(wa_boundary_sf)
# Identify numeric columns automatically
numeric_cols <- data_sf %>%
sf::st_drop_geometry() %>%
dplyr::select(where(is.numeric)) %>%
names()
# Filter for >3 Z-score
outliers_sf <- data_sf %>%
dplyr::filter(dplyr::if_any(dplyr::all_of(numeric_cols), ~ abs(scale(.)) > 3))
# WA Peak Labels
wa_peaks_sf <- tibble::tibble(
peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
) %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
# 3. Plot
plt <- ggplot2::ggplot() +
# Layer A: Elevation Color
tidyterra::geom_spatraster(data = elev_terra) +
tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
# Layer B: Hillshade Shadow Mask
tidyterra::geom_spatraster(
data = hill_terra,
ggplot2::aes(alpha = ggplot2::after_stat(value)),
fill = "black",
show.legend = FALSE
) +
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
# Layer C: Vectors (Boundary + Outlier Points)
ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
ggplot2::geom_sf(data = outliers_sf, ggplot2::aes(color = forested), size = 2.5) +
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
# Layer D: Labels
ggrepel::geom_label_repel(
data = wa_peaks_sf,
ggplot2::aes(label = peak, geometry = geometry),
stat = "sf_coordinates",
size = 5,
fontface = "bold",
box.padding = 0.6,
min.segment.length = 0
) +
# Theme Application
theme_forestry_spatial() +
labs(x = "", y = "")
# Save
ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
return(output_path)
}
plot_wa_pca <- function(data) {
# REVISED: Keep Lat/Lon in the numeric matrix
numeric_mat <- data %>%
sf::st_drop_geometry() %>%
dplyr::select(where(is.numeric)) %>%
dplyr::select(-any_of(c("year")))
pca_res <- stats::prcomp(numeric_mat, scale. = TRUE)
# Calculate variance
var_exp <- round(100 * pca_res$sdev^2 / sum(pca_res$sdev^2), 1)
# Calculate the sum for the reader
total_var <- var_exp[1] + var_exp[2]
as.data.frame(pca_res$x) %>%
dplyr::mutate(forested = data$forested) %>%
ggplot2::ggplot(aes(x = PC1, y = PC2, color = forested)) +
ggplot2::geom_point(alpha = 0.5) +
ggplot2::scale_color_manual(values = c("Yes" = "#2D5A27", "No" = "#A0522D")) +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::labs(
title = "PCA: Biophysical + Geography",
# REVISED SUBTITLE: Sums it up cleanly
subtitle = paste0("First two components capture ", total_var, "% of total variance"),
x = paste0("PC1 (", var_exp[1], "%)"),
y = paste0("PC2 (", var_exp[2], "%)")
) +
ggplot2::scale_x_continuous(limits = c(-10, 5))
}
plot_correlations <- function(data) {
# 1. Data Prep (Same as before)
cor_data <- data %>%
sf::st_drop_geometry() %>%
dplyr::mutate(Forested_Binary = dplyr::if_else(forested == "Yes", 1, 0)) %>%
dplyr::select(Forested_Binary, dplyr::where(is.numeric)) %>%
dplyr::select(-any_of(c("year")))
cor_df <- cor(cor_data, use = "pairwise.complete.obs") %>%
as.data.frame() %>%
dplyr::select(correlation = Forested_Binary) %>%
tibble::rownames_to_column("variable") %>%
dplyr::filter(variable != "Forested_Binary") %>%
dplyr::mutate(
sign = dplyr::if_else(correlation > 0, "Positive", "Negative"),
is_spatial = dplyr::if_else(variable %in% c("lat", "lon"), "Spatial", "Biophysical")
)
# 2. Plot with SPACE
ggplot2::ggplot(cor_df, aes(x = reorder(variable, correlation), y = correlation)) +
ggplot2::geom_segment(aes(xend = variable, yend = 0), color = "gray50") +
ggplot2::geom_point(aes(color = sign, shape = is_spatial), size = 5) +
ggplot2::coord_flip() +
ggplot2::theme_minimal(base_size = 18) +
ggplot2::scale_color_manual(values = c("Positive" = "#228B22", "Negative" = "#D2691E")) +
ggplot2::labs(
title = "What drives the 'Forested' classification?",
subtitle = "Correlations with Forested (Yes=1). Note the impact of Latitude.",
x = NULL,
y = "Correlation Coefficient",
color = "Direction",
shape = "Variable Type"
)
}
#' Plot Random Forest Variable Importance
#'
#' @description Fits a ranger Random Forest model to the provided data, calculates
#' permutation importance, and generates a lollipop chart. It distinguishes
#' between spatial (lat/lon) and biophysical predictors.
#' Uses the project's 'Atkinson' font theme via theme_forestry_plot().
#'
#' @param data An sf object or data frame containing the 'forested' target and predictors.
#'
#' @importFrom ranger ranger
#' @importFrom vip vi
#' @importFrom sf st_drop_geometry
#' @importFrom dplyr select mutate if_else any_of where
#' @importFrom ggplot2 ggplot aes geom_segment geom_point scale_shape_manual scale_color_manual labs
#' @export
plot_rf_importance <- function(data) {
# 1. Clean Data & Fit Model
model_data <- data %>%
sf::st_drop_geometry() %>%
dplyr::select(forested, dplyr::where(is.numeric)) %>%
dplyr::select(-dplyr::any_of(c("year"))) %>%
dplyr::mutate(forested = as.factor(forested))
rf_model <- ranger::ranger(
formula = forested ~ .,
data = model_data,
importance = "permutation",
seed = 123
)
# 2. Extract importance & Add Spatial Flag
imp_df <- vip::vi(rf_model) %>%
dplyr::mutate(
Type = dplyr::if_else(Variable %in% c("lat", "lon"), "Spatial", "Biophysical")
)
# 3. Tufte-style Lollipop with Shapes
ggplot2::ggplot(imp_df, ggplot2::aes(x = Importance, y = reorder(Variable, Importance))) +
# Stems
ggplot2::geom_segment(ggplot2::aes(xend = 0, yend = Variable), color = "gray60", linewidth = 0.8) +
# Dots (Mapped to Shape)
ggplot2::geom_point(
ggplot2::aes(shape = Type, color = Type),
size = 5
) +
# --- VISUAL MAPPING ---
ggplot2::scale_shape_manual(values = c("Biophysical" = 16, "Spatial" = 17)) +
ggplot2::scale_color_manual(values = c("Biophysical" = "#228B22", "Spatial" = "black")) +
# --- THEME FIX ---
# We use your custom theme, which now enforces Atkinson fonts.
# turning off grid because lollipops look cleaner without it
theme_forestry_plot(font_size = 16, grid = FALSE) +
ggplot2::labs(
title = "Random Forest Variable Importance",
subtitle = "Permutation Importance. Note the high rank of spatial variables.",
y = NULL,
x = "Importance (Loss in Accuracy if scrambled)",
shape = "Variable Type",
color = "Variable Type"
)
}
plot_umap_forested <- function(data) {
data_clean <- data %>%
drop_na()
numeric_data <- data_clean %>%
select(where(is.numeric)) %>%
scale()
set.seed(42)
umap_fit <- umap::umap(numeric_data)
# Format for plotting
umap_df <- umap_fit$layout %>%
as.data.frame() %>%
dplyr::rename(UMAP1 = V1, UMAP2 = V2) %>%
dplyr::mutate(forested = as.factor(data_clean$forested))
# Generate Plot
ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = forested)) +
geom_point(alpha = 0.6, size = 1.5) +
#scale_color_viridis_d(name = "Forested Status", begin = 0.2, end = 0.8) +
scale_color_manual(
name = "Forested?",
# Assumes factor order is (No, Yes). Swap colors if order differs.
values = c("#228B22", "#D2691E")
) +
theme_minimal() +
labs(
title = "UMAP Projection of Forested Areas",
subtitle = "Clustering based on numeric landscape features",
x = "UMAP Dimension 1",
y = "UMAP Dimension 2"
)
}
plot_cv_strategies <- function(data) {
# 1. SETUP DATA & ADD ID
# We MUST add an ID column to track rows across splits
forested_sf <- st_as_sf(
data,
coords = c("lon", "lat"),
crs = 4326,
remove = FALSE
) %>%
mutate(id = row_number()) # <--- Critical: Adds the ID we need later
# 2. DEFINE CUSTOM ECOLOGICAL DISTANCE
dist_env <- function(x) {
x %>%
st_drop_geometry() %>%
select(elevation, precip_annual, temp_annual_mean, roughness) %>%
scale() %>%
dist()
}
# 3. CREATE THE SPLITS
set.seed(42)
# A. Random
random_folds <- vfold_cv(forested_sf, v = 5)
# B. Spatial Block
spatial_folds <- spatial_block_cv(forested_sf, v = 5)
# C. Environmental Clustering
cluster_folds <- spatial_clustering_cv(
forested_sf,
v = 5,
cluster_function = "hclust",
distance_function = dist_env
)
# 4. HELPER: EXTRACT FOLD IDs
# This assigns a Fold Number (1-5) to every single point
get_fold_ids <- function(splits, original_data) {
results <- original_data %>%
mutate(fold = NA_character_)
for(i in seq_along(splits$splits)) {
test_ids <- assessment(splits$splits[[i]]) %>% pull(id)
results <- results %>%
mutate(fold = ifelse(id %in% test_ids, as.character(i), fold))
}
return(results)
}
# 5. PREPARE PLOTTING DATA
df_random <- get_fold_ids(random_folds, forested_sf)
df_spatial <- get_fold_ids(spatial_folds, forested_sf)
df_cluster <- get_fold_ids(cluster_folds, forested_sf)
# 6. COMMON PLOT FUNCTION
plot_folds <- function(df, title) {
ggplot(df) +
geom_sf(aes(color = fold), size = 0.5, alpha = 0.6) +
scale_color_brewer(palette = "Set1", name = "Fold") + # <--- The Rainbow Colors
#scale_color_discrete_qualitative(palette = "Harmonic", name = "Fold") +
#scale_color_discrete_qualitative(palette = "Dark 3", name = "Fold") +
#scale_color_discrete_qualitative(palette = "Dynamic", name = "Fold") +
ggtitle(title) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
legend.position = "none"
)
}
# 7. GENERATE & COMBINE
p1 <- plot_folds(df_random, "Random\n(Confetti)")
p2 <- plot_folds(df_spatial, "Spatial Block\n(Checkerboard)")
p3 <- plot_folds(df_cluster, "Env. Clustering\n(Ecoregions)")
p1 + p2 + p3 + plot_layout(ncol = 3)
}
plot_spatial_cv_comparison <- function(res_random, res_block, res_cluster) {
# 1. Extract & Clean Data
all_metrics <- bind_rows(
res_random %>% collect_metrics() %>% mutate(strategy = "Random CV"),
res_block %>% collect_metrics() %>% mutate(strategy = "Block CV"),
res_cluster %>% collect_metrics() %>% mutate(strategy = "Cluster CV")
) %>%
filter(.metric == "roc_auc") %>%
mutate(
recipe_label = case_when(
str_detect(wflow_id, "^base") ~ "Coords",
str_detect(wflow_id, "^non_spatial") ~ "No Coords",
str_detect(wflow_id, "^extensible") ~ "Extensible",
TRUE ~ "Unknown"
),
model = str_remove(wflow_id, "(base_|non_spatial_|extensible_)"),
strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")),
recipe_label = factor(recipe_label, levels = c("Coords", "No Coords", "Extensible"))
)
# 2. Generate Plot
ggplot(all_metrics, aes(x = model, y = mean, color = model)) +
geom_hline(yintercept = 0.96, linetype = "dashed", color = "gray50", alpha = 0.5) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), width = 0.2) +
scale_color_discrete_qualitative(palette = "Dark 3") +
facet_grid(recipe_label ~ strategy) +
scale_y_continuous(labels = scales::number_format(accuracy = 0.01),
limits = c(.65, 1)) +
labs(
y = "ROC AUC Score",
x = "Model Type",
color = "Model"
) +
theme_bw(base_size = 14) +
theme(
legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(face = "bold", size = 10)
)
}
plot_spatial_vip_comparison <- function(res_random, res_block, res_cluster, model_id = "base_rf") {
# Internal helper to extract VIP from the best fold
get_best_fold_vip <- function(wflow_obj, strategy_name) {
# 1. Extract results for the specific model ID (e.g., "base_rf")
resample_results <- wflow_obj %>% extract_workflow_set_result(model_id)
# 2. Find the best fold
best_fold_id <- resample_results %>%
collect_metrics(summarize = FALSE) %>%
filter(.metric == "roc_auc") %>%
slice_max(.estimate, n = 1) %>%
slice(1) %>%
pull(id)
# 3. Get the split object for that fold
best_split <- resample_results$splits[[which(resample_results$id == best_fold_id)]]
# 4. Refit and calculate Importance
wflow_obj %>%
extract_workflow(model_id) %>%
finalize_workflow(select_best(resample_results, metric = "roc_auc")) %>%
fit(data = analysis(best_split)) %>%
extract_fit_parsnip() %>%
vi() %>%
mutate(strategy = strategy_name)
}
# Consolidate and clean
vip_data <- bind_rows(
get_best_fold_vip(res_random, "Random CV"),
get_best_fold_vip(res_block, "Block CV"),
get_best_fold_vip(res_cluster, "Cluster CV")
) %>%
# Filter ONLY for Lat/Lon (The interaction term doesn't exist in your new recipe)
filter(Variable %in% c("lat", "lon")) %>%
mutate(
Variable = case_when(
Variable == "lon" ~ "Longitude",
Variable == "lat" ~ "Latitude",
TRUE ~ Variable
),
strategy = factor(strategy, levels = c("Cluster CV", "Block CV", "Random CV"))
)
# Generate Plot
ggplot(vip_data, aes(x = Importance, y = strategy)) +
geom_segment(aes(x = 0, xend = Importance, y = strategy, yend = strategy),
color = "gray20", linewidth = .65) +
geom_point(aes(color = strategy), size = 5, show.legend = FALSE) +
facet_grid(Variable ~ ., scales = "free_y", space = "free_y", switch = "y") +
scale_color_discrete_qualitative(palette = "Dark 3") +
scale_x_continuous(expand = c(0, 0)) + # Removed fixed limit (105) to let it scale automatically
labs(
title = paste("Spatial Overfitting Check:", model_id),
subtitle = "Does the model rely on coordinates?",
x = "Importance Score (Impurity)",
y = NULL
) +
theme_classic(base_size = 14) +
theme(
panel.grid.minor = element_blank()
)
}
# vvv ADD THIS vvv
plot_model_stability <- function(res_random, res_block, res_cluster, model_id) {
# Internal helper to extract per-fold metrics
extract_fold_metrics <- function(wflow_obj, strategy_name) {
wflow_obj %>%
extract_workflow_set_result(id = model_id) %>% # <--- Now this works
collect_metrics(summarize = FALSE) %>%
filter(.metric == "roc_auc") %>%
mutate(strategy = strategy_name)
}
# Combine and factorize
all_fold_metrics <- bind_rows(
extract_fold_metrics(res_random, "Random CV"),
extract_fold_metrics(res_block, "Block CV"),
extract_fold_metrics(res_cluster, "Cluster CV")
) %>%
mutate(strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")))
# Generate Plot
ggplot(all_fold_metrics, aes(x = strategy, y = .estimate, color = strategy)) +
geom_violin(alpha = 0.4, outlier.shape = NA, width = 0.5) +
geom_jitter(width = 0.1, size = 3, alpha = 0.8) +
scale_fill_discrete_qualitative(palette = "Dark 3") +
scale_color_discrete_qualitative(palette = "Dark 3") +
scale_y_continuous(limits = c(0.80, 1.0), breaks = seq(0.80, 1.0, 0.05)) +
labs(
title = paste("Stability Analysis:", model_id), # <--- Helpful title
x = NULL,
y = "ROC AUC (per fold)",
caption = ""
) +
theme_classic(base_size = 14) +
theme(
legend.position = "none"
)
}
plot_final_test_results <- function(final_fit_obj) {
final_metrics <- final_fit_obj %>%
collect_metrics() %>%
filter(.metric %in% c("roc_auc", "accuracy")) %>%
mutate(
.metric = case_when(
.metric == "roc_auc" ~ "ROC AUC",
.metric == "accuracy" ~ "Accuracy",
TRUE ~ .metric
)
)
ggplot(final_metrics, aes(x = .metric, y = .estimate, fill = .metric)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = round(.estimate, 3)), vjust = -0.5, size = 5, fontface = "bold") +
scale_fill_manual(values = c("ROC AUC" = "#2c3e50", "Accuracy" = "#18bc9c")) +
scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2), expand = c(0, 0)) +
labs(
x = NULL,
y = "Performance Score",
caption = "Evaluation based on the 20% held-out Washington Test Set."
) +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(face = "bold", size = 12))
}
plot_final_confusion_matrix <- function(final_fit_obj) {
# Extract the predictions from the last_fit object
preds <- final_fit_obj %>% collect_predictions()
# Generate the confusion matrix
cm <- preds %>%
conf_mat(truth = forested, estimate = .pred_class)
# Convert to a tidy format for plotting
autoplot(cm, type = "heatmap") +
scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
theme_minimal(base_size = 14) +
theme(
panel.grid = element_blank(),
axis.text = element_text(face = "bold"),
plot.title = element_text(face = "bold"),
legend.position = "none"
)
}
plot_fold_mechanics <- function(wa_sf, boundary_wa_sf) {
# 1. SETUP
set.seed(42)
# Ensure unique_id exists to prevent the "object not found" error
if(!"unique_id" %in% names(wa_sf)) {
wa_sf <- wa_sf %>% mutate(unique_id = row_number())
}
# 2. DEFINE CUSTOM DISTANCE
dist_env <- function(x) {
x %>%
st_drop_geometry() %>%
select(elevation, precip_annual, temp_annual_mean, roughness) %>%
scale() %>%
dist()
}
# 3. CREATE SPLITS
random_folds <- vfold_cv(wa_sf, v = 5)
spatial_folds <- spatial_block_cv(wa_sf, v = 5)
cluster_folds <- spatial_clustering_cv(wa_sf, v = 5, cluster_function = "hclust", distance_function = dist_env)
# 4. HELPER: EXTRACT STATUS
get_status <- function(split_obj) {
first_split <- split_obj$splits[[1]]
assessment_ids <- assessment(first_split) %>% pull(unique_id)
wa_sf %>%
mutate(status = if_else(unique_id %in% assessment_ids, "Assessment (Test)", "Analysis (Train)"))
}
df_r <- get_status(random_folds)
df_s <- get_status(spatial_folds)
df_c <- get_status(cluster_folds)
# 5. PLOTTER WITH BOUNDARY LAYER
plot_one <- function(df, title) {
ggplot() +
# Layer 1: The state boundary provides the spatial context
geom_sf(data = boundary_wa_sf, fill = "gray98", color = "gray85") +
# Layer 2: The actual data points
geom_sf(data = df, aes(color = status), size = 0.4, alpha = 0.7) +
scale_color_manual(values = c("Assessment (Test)" = "#E16A86", "Analysis (Train)" = "#606060")) +
ggtitle(title) +
theme_void() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 10)
)
}
# 6. COMBINE
(plot_one(df_r, "Random\n(Confetti)") +
plot_one(df_s, "Spatial\n(Checkerboard)") +
plot_one(df_c, "Env. Clustering\n(Ecoregions)")) +
plot_layout(ncol = 3, guides = "collect") &
theme(legend.position = 'bottom', legend.title = element_blank())
}
plot_classic_kfold_diagram <- function(){
library(tidyverse)
# 1. Setup Parameters
N_obs <- 100
K_folds <- 5
# 2. Create Mock Data and Assign Folds
set.seed(42)
base_data <- tibble(
obs_id = 1:N_obs,
assigned_fold_group = sample(rep(1:K_folds, length.out = N_obs))
)
# 3. Expand Data for Plotting (Cross-Join)
plot_data <- expand_grid(
obs_id = 1:N_obs,
iteration = 1:K_folds
) %>%
left_join(base_data, by = "obs_id") %>%
# Define Status: If the iteration matches the assigned fold group, it's Test data.
mutate(
status = case_when(
iteration == assigned_fold_group ~ "Assessment (20%)",
TRUE ~ "Analysis (80%)"
),
# Convert iteration to factor and reverse levels so Fold 1 is at the top of plot
iteration_fct = factor(iteration, levels = rev(1:K_folds))
)
# 4. Define Colors
cv_colors <- c(
"Analysis (80%)" = "#A6CEE3",
"Assessment (20%)" = "#FDB462"
)
# 5. Generate the Plot
ggplot(plot_data, aes(x = obs_id, y = iteration_fct, fill = status)) +
geom_tile(color = "white", linewidth = 0.2) +
scale_fill_manual(values = cv_colors, name = "Data Role") +
labs(
x = "Observations",
y = "Folds"
) +
# Theme adjustments
theme_minimal(base_size = 16) +
theme(
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom"
)
}
create_performance_table <- function(results_cluster, final_fit_results) {
require(dplyr)
require(gt)
require(tidymodels)
# 1. Extract the WINNING Model Info (Name & Score)
best_model_info <- results_cluster %>%
rank_results(rank_metric = "roc_auc", select_best = TRUE) %>%
filter(.metric == "roc_auc") %>%
slice(1) # Grab the #1 spot
# Get the dynamic name and score
best_model_name <- best_model_info$wflow_id
cv_score <- best_model_info$mean
# 2. Extract the Final Test Set Scores
test_scores <- final_fit_results %>%
collect_metrics() %>%
select(.metric, .estimate) %>%
tidyr::pivot_wider(names_from = .metric, values_from = .estimate)
# 3. Create the Combined Data Frame
summary_data <- tibble(
# FIXED: Uses the actual winner's name instead of hardcoded text
Model = best_model_name,
`Validation AUC` = cv_score,
`Test AUC` = test_scores$roc_auc,
`Test Accuracy` = test_scores$accuracy
)
# 4. Render with gt
summary_data %>%
gt() %>%
tab_header(
title = md("**Final Model Performance**"),
subtitle = paste("Winner:", best_model_name)
) %>%
fmt_number(
columns = where(is.numeric),
decimals = 3
) %>%
cols_align(
align = "center",
columns = where(is.numeric)
) %>%
tab_footnote(
footnote = "Mean ROC AUC across spatial cross-validation folds.",
locations = cells_column_labels(columns = `Validation AUC`)
) %>%
tab_footnote(
footnote = "Evaluated on the 20% independent Washington holdout set.",
locations = cells_column_labels(columns = contains("Test"))
) %>%
tab_options(
table.border.top.color = "white",
table.border.bottom.color = "black",
column_labels.font.weight = "bold"
)
}
plot_yeo_johnson <- function(data) {
# 1. Prepare Recipe & Extract Lambda
rec_yeo <- recipe(~ elevation, data = data) %>%
step_YeoJohnson(elevation) %>%
prep()
lambda_val <- tidy(rec_yeo, number = 1) %>%
filter(terms == "elevation") %>%
pull(value) %>%
round(2)
# 2. Create Plotting Data
plot_data <- data %>%
select(elevation) %>%
mutate(type = "Original") %>%
bind_rows(
bake(rec_yeo, new_data = data) %>%
mutate(type = "Transformed") %>%
rename(elevation_trans = elevation)
)
# 3. Setup Colors
dark3_cols <- colorspace::qualitative_hcl(2, palette = "Dark 3")
# 4. Plot A: Original
p1 <- ggplot(filter(plot_data, type == "Original"), aes(x = elevation)) +
geom_density(fill = dark3_cols[1], color = dark3_cols[1], alpha = 0.6, linewidth = 1) +
scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = "Elevation (m)", y = "Density") +
theme_minimal(base_size = 14) +
theme(
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
)
# 5. Plot B: Transformed
p2 <- ggplot(filter(plot_data, type == "Transformed"), aes(x = elevation_trans)) +
geom_density(fill = dark3_cols[2], color = dark3_cols[2], alpha = 0.6, linewidth = 1) +
scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = paste0("Yeo-Johnson (Lambda: ", lambda_val, ")"), y = "Density") +
theme_minimal(base_size = 14) +
theme(
panel.grid.minor = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
)
# 6. Combine
p1 + p2
}
#' Save Model Error Diagnostic Map
#'
#' @description Generates a diagnostic map highlighting prediction errors. It plots
#' misclassified points colored by the magnitude of the error (confidence in the wrong answer)
#' over a hillshaded elevation background.
#'
#' @param data A data frame containing model predictions (must include '.pred_class',
#' 'forested', '.pred_Yes', 'lon', and 'lat').
#' @param boundary_sf An \code{sf} object representing the state boundary.
#' @param raster_path Character string. File path to the elevation raster (.tif).
#' @param output_path Character string. File path where the PNG will be saved.
#'
#' @importFrom terra rast terrain shade
#' @importFrom sf st_transform st_as_sf st_filter
#' @importFrom dplyr filter mutate
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_viridis_c labs ggsave guides guide_colorbar unit margin
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
#'
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
#' @export
save_error_map_png <- function(data, boundary_sf, raster_path, output_path) {
# 1. Load & Process Hillshade (Standard Setup)
elev_terra <- terra::rast(raster_path)
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
# 2. Prep Vectors & Calculate Errors
wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
# Filter to ONLY the mistakes and calculate how "wrong" they were
error_sf <- data %>%
# Keep only rows where the Hard Class Prediction was wrong
dplyr::filter(.pred_class != forested) %>%
# Calculate Magnitude: How far was the probability from the truth?
dplyr::mutate(
truth_num = ifelse(forested == "Yes", 1, 0),
error_magnitude = abs(truth_num - .pred_Yes)
) %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_filter(wa_boundary_sf)
# 3. Plot
plt <- ggplot2::ggplot() +
# --- Background Layers ---
tidyterra::geom_spatraster(data = elev_terra) +
tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
tidyterra::geom_spatraster(data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE) +
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
# --- The Error Layer ---
ggplot2::geom_sf(
data = error_sf,
ggplot2::aes(color = error_magnitude),
size = 3,
alpha = 0.9
) +
ggplot2::scale_color_viridis_c(
option = "plasma",
name = "Error\nConfidence",
limits = c(0.5, 1.0),
direction = -1
) +
# --- Theme Application ---
theme_forestry_spatial() +
ggplot2::labs(x = NULL, y = NULL) +
# --- Legend Controls (Right Side Vertical) ---
ggplot2::guides(
fill = "none",
color = ggplot2::guide_colorbar(
title.position = "top",
barwidth = ggplot2::unit(0.5, "cm"), # Narrow width
barheight = ggplot2::unit(6, "cm") # Tall height for right side
)
) +
ggplot2::theme(legend.position = "right")
# Save
ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
return(output_path)
}
#' Calculate Area of Applicability Data
#'
#' @description Generates the Area of Applicability (AOA) scores (Dissimilarity Index)
#' for the Georgia extrapolation dataset based on the Washington training data.
#'
#' @param train_data Dataframe. The training data from Washington.
#' @param test_data Dataframe. The extrapolation data from Georgia.
#' @param predictors Character vector. The list of predictor variable names.
#'
#' @importFrom dplyr select all_of bind_cols
#' @importFrom tibble tibble
#' @importFrom waywiser ww_area_of_applicability
#' @importFrom stats predict
#' @importFrom sf st_as_sf
#'
#' @return An \code{sf} object containing the Georgia data with an added 'di' (Dissimilarity Index) column.
#' @export
calculate_ga_aoa <- function(train_data, test_data, predictors) {
# 1. Prepare Environments
train_env <- train_data %>% dplyr::select(dplyr::all_of(predictors))
test_env <- test_data %>% dplyr::select(dplyr::all_of(predictors))
# 2. Define "Flat" Importance
importance <- tibble::tibble(term = predictors, estimate = 1)
# 3. Fit AOA
aoa_model <- waywiser::ww_area_of_applicability(
x = train_env,
importance = importance
)
# 4. Predict
aoa_results <- stats::predict(aoa_model, test_env)
# 5. Return Spatial Object
test_data %>%
dplyr::bind_cols(aoa_results) %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
}
#' Plot Georgia Area of Applicability (AOA)
#'
#' @description Plots the pre-calculated Dissimilarity Index (DI) for Georgia.
#'
#' @param aoa_sf An \code{sf} object containing the 'di' column (output of \code{calculate_ga_aoa}).
#'
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_viridis_c labs guides guide_colorbar unit theme
#'
#' @return A \code{ggplot} object showing the Dissimilarity Index map.
#' @export
plot_georgia_aoa <- function(aoa_sf) {
ggplot2::ggplot(aoa_sf) +
# Points colored by Dissimilarity Index (DI)
ggplot2::geom_sf(ggplot2::aes(color = di), size = 1, alpha = 0.8) +
# Color Scale
ggplot2::scale_color_viridis_c(
option = "magma",
direction = 1,
name = "Dissimilarity\nIndex (DI)"
) +
# Theme
theme_forestry_spatial() +
ggplot2::labs(x = NULL, y = NULL) +
# Legend Control
ggplot2::guides(
color = ggplot2::guide_colorbar(
title.position = "top",
barwidth = ggplot2::unit(0.5, "cm"),
barheight = ggplot2::unit(6, "cm")
)
) +
ggplot2::theme(legend.position = "right")
}
# Predict on a new region (External Validation)
predict_external_region <- function(final_fit, new_data) {
wa_model <- final_fit$.workflow[[1]]
# FIX: Mock ALL columns that were in the training data
# The model expects 'geometry' because training data was an sf object
clean_data <- new_data %>%
dplyr::mutate(
northness = NA_real_,
county = NA_character_,
geometry = NA # Tidymodels will see this exists, then ignore it (as it's not a predictor)
)
# Predict Class and Probability
preds <- predict(wa_model, clean_data) %>%
dplyr::bind_cols(predict(wa_model, clean_data, type = "prob")) %>%
dplyr::bind_cols(new_data %>% dplyr::select(lat, lon, forested))
return(preds)
}
#' Plot Georgia Forest Comparison
#'
#' @description Creates a side-by-side comparison of forest cover for Georgia.
#' The left plot (a) is the Model Prediction, and the right plot (b) is the Actual Data.
#' Features a shared right-side legend and standardized spatial styling.
#'
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
#'
#' @importFrom dplyr filter
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual labs theme guides guide_legend element_text
#' @importFrom patchwork plot_layout wrap_plots plot_annotation
#'
#' @return A \code{patchwork} object containing the labeled comparison plot.
#' @export
plot_ga_comparison_map <- function(pred_data, boundaries) {
# 1. Prepare Data & Boundaries
ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
plot_data <- pred_data %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_transform(sf::st_crs(ga_boundary))
# 2. Helper Function (No Title Argument)
create_map <- function(data, fill_col) {
ggplot2::ggplot() +
# Background
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
# Points
ggplot2::geom_sf(data = data,
ggplot2::aes(color = {{ fill_col }}),
size = 0.5, alpha = 0.6) +
# Colors
ggplot2::scale_color_manual(
name = "Forest Cover",
values = c("Yes" = "#228B22", "No" = "#D2691E"),
labels = c("Forest", "Non-Forest")
) +
# Labels (No Title)
ggplot2::labs(title = NULL, x = NULL, y = NULL) +
# Theme
theme_forestry_spatial() +
# Define Legend Here to avoid Patchwork "Order of Ops" Error
ggplot2::guides(
color = ggplot2::guide_legend(
override.aes = list(size = 4, alpha = 1),
title.position = "top",
ncol = 1
)
) +
ggplot2::theme(
legend.title = ggplot2::element_text(face = "bold", size = 12),
legend.text = ggplot2::element_text(size = 10)
)
}
# 3. Create Subplots
p_pred <- create_map(plot_data, .pred_class) # Left: Predicted
p_actual <- create_map(plot_data, forested) # Right: Actual
# 4. Patchwork Layout with (a)/(b) Tags
combined_plot <- (p_pred + p_actual) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')',
theme = theme_forestry_spatial() # Ensure tags match font
) &
ggplot2::theme(
legend.position = "right",
plot.tag = ggplot2::element_text(size = 20, face = "bold") # Make tags prominent
)
return(combined_plot)
}
plot_ga_confusion_matrix <- function(pred_data) {
# 1. Generate the confusion matrix object
cm <- pred_data %>%
yardstick::conf_mat(truth = forested, estimate = .pred_class)
# 2. Plot using autoplot to match the previous style exactly
plt <- ggplot2::autoplot(cm, type = "heatmap") +
# MATCHED: The exact blue gradient from your previous slide
ggplot2::scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
ggplot2::theme_minimal(base_size = 14) +
ggplot2::theme(
panel.grid = ggplot2::element_blank(),
axis.text = ggplot2::element_text(face = "bold"),
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5), # Added hjust for center alignment
legend.position = "none"
)
return(plt)
}
#' Plot Failure Mechanism Comparison
#'
#' @description Creates a side-by-side diagnostic plot returning a patchwork object.
#' (a) The Area of Applicability (Dissimilarity Index) showing where the model is extrapolating.
#' (b) The spatial distribution of actual classification errors.
#'
#' @param aoa_data Dataframe containing the AOA results (must have 'di', 'lon', 'lat').
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
#'
#' @importFrom dplyr filter mutate case_when
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes labs theme guides guide_legend guide_colorbar element_rect element_text unit
#' @importFrom colorspace scale_color_discrete_qualitative
#' @importFrom patchwork plot_layout plot_annotation wrap_plots
#'
#' @return A \code{patchwork} object containing the side-by-side comparison.
#' @export
plot_failure_mechanism <- function(aoa_data, pred_data, boundaries) {
# 1. Prep Boundaries
ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
target_crs <- sf::st_crs(ga_boundary)
# 2. Prep Map A: Dissimilarity (Risk)
aoa_sf <- aoa_data %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_transform(target_crs)
p_risk <- ggplot2::ggplot() +
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
ggplot2::geom_sf(data = aoa_sf, ggplot2::aes(color = di), size = 0.5, alpha = 0.8) +
ggplot2::scale_color_viridis_c(
option = "magma",
direction = 1,
name = "Dissimilarity Idx."
) +
ggplot2::labs(x = NULL, y = NULL) +
theme_forestry_spatial() +
ggplot2::guides(
color = ggplot2::guide_colorbar(
title.position = "top",
title.hjust = 0.5,
barwidth = ggplot2::unit(4.5, "cm"),
barheight = ggplot2::unit(.3, "cm")
)
) +
ggplot2::theme(legend.position ="bottom")
# 3. Prep Map B: Failures (Effect)
errors_sf <- pred_data %>%
dplyr::filter(.pred_class != forested) %>%
dplyr::mutate(
error_type = dplyr::case_when(
.pred_class == "Yes" & forested == "No" ~ "False Positive",
.pred_class == "No" & forested == "Yes" ~ "False Negative"
)
) %>%
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
sf::st_transform(target_crs)
p_fail <- ggplot2::ggplot() +
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
ggplot2::geom_sf(data = errors_sf, ggplot2::aes(color = error_type), size = 0.6, alpha = 0.6) +
colorspace::scale_color_discrete_qualitative(
palette = "Dark 3",
name = "Error Type"
) +
ggplot2::labs(x = NULL, y = NULL) +
theme_forestry_spatial() +
ggplot2::guides(
color = ggplot2::guide_legend(
override.aes = list(size = 4, alpha = 1),
title.position = "top",
title.hjust = 0.5
)
) +
ggplot2::theme(legend.position = "bottom")
# 4. Patchwork Assembly
combined_plot <- (p_risk + p_fail) +
patchwork::plot_layout(guides = "keep") +
patchwork::plot_annotation(
tag_levels = 'a',
tag_prefix = '(',
tag_suffix = ')',
theme = theme_forestry_spatial()
) &
ggplot2::theme(
plot.tag = ggplot2::element_text(size = 20, face = "bold"),
plot.background = ggplot2::element_rect(fill = "transparent", color = NA)
)
return(combined_plot)
}
#' Plot Annual Rainfall Comparison (Clipped Hexes)
#'
#' @description Creates a polished side-by-side comparison of annual precipitation.
#' Hexagons are spatially generated and clipped to the exact state boundaries
#' to eliminate "bleeding" edges. Uses \code{theme_forestry_void} with explicit
#' font sizing to match topographic maps.
#'
#' @param wa_data Dataframe containing Washington data (requires 'precip_annual', 'lat', 'lon').
#' @param ga_data Dataframe containing Georgia data (requires 'precip_annual', 'lat', 'lon').
#' @param boundaries An \code{sf} object containing state boundaries.
#' @param bins Integer. Number of hexes across the state width. Default is 30.
#' @param max_limit Numeric. The visual cap for rainfall (mm) to ensure comparable scales. Default is 2500.
#'
#' @importFrom dplyr filter mutate group_by summarize inner_join row_number
#' @importFrom sf st_as_sf st_transform st_crs st_make_grid st_intersection st_join st_drop_geometry
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_distiller guide_colorbar unit theme element_text element_rect margin coord_sf rel
#' @importFrom scales squish
#' @importFrom patchwork plot_layout plot_annotation
#'
#' @return A \code{patchwork} object containing the side-by-side comparison.
#' @export
plot_precip_hex_comparison <- function(wa_data, ga_data, boundaries, bins = 30, max_limit = 2500) {
# 1. Clean Data
wa_clean <- wa_data %>%
dplyr::filter(!is.na(precip_annual)) %>%
dplyr::mutate(precip_annual = as.numeric(precip_annual))
ga_clean <- ga_data %>%
dplyr::filter(!is.na(precip_annual)) %>%
dplyr::mutate(precip_annual = as.numeric(precip_annual))
# 2. Helper: Generate Clipped Hex Grid
get_clipped_hex_layer <- function(point_data, boundary_sf, n_bins) {
target_crs <- sf::st_crs(boundary_sf)
points_sf <- sf::st_as_sf(point_data, coords = c("lon", "lat"), crs = 4326) %>%
sf::st_transform(target_crs)
grid <- sf::st_make_grid(boundary_sf, n = n_bins, square = FALSE, flat_topped = TRUE) %>%
sf::st_as_sf() %>%
dplyr::mutate(hex_id = dplyr::row_number())
grid_clipped <- sf::st_intersection(grid, boundary_sf)
joined <- sf::st_join(points_sf, grid_clipped, join = sf::st_within)
hex_summary <- joined %>%
dplyr::filter(!is.na(hex_id)) %>%
dplyr::group_by(hex_id) %>%
dplyr::summarize(precip_mean = mean(precip_annual, na.rm = TRUE))
final_layer <- grid_clipped %>%
dplyr::inner_join(sf::st_drop_geometry(hex_summary), by = "hex_id")
return(final_layer)
}
# 3. Process States
wa_bnd <- boundaries %>% dplyr::filter(NAME == "Washington" | NAME == "WA")
ga_bnd <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
wa_hex_sf <- get_clipped_hex_layer(wa_clean, wa_bnd, bins)
ga_hex_sf <- get_clipped_hex_layer(ga_clean, ga_bnd, bins)
# 4. Plotting Helper
plot_hex_sf <- function(hex_data, boundary_sf) {
ggplot2::ggplot() +
ggplot2::geom_sf(data = hex_data, ggplot2::aes(fill = precip_mean), color = NA) +
ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
# [CRITICAL] Removes grid lines (datum=NA) and removes padding (expand=FALSE)
ggplot2::coord_sf(datum = NA, expand = FALSE) +
ggplot2::scale_fill_distiller(
palette = "Blues",
direction = 1,
limits = c(0, max_limit),
oob = scales::squish,
name = "Mean Annual Precip (mm)",
guide = ggplot2::guide_colorbar(
title.position = "top",
title.hjust = 0.5,
barwidth = ggplot2::unit(8, "cm"),
barheight = ggplot2::unit(0.4, "cm"),
frame.colour = "black",
ticks.colour = "black"
)
) +
# Use Base Size 16 to match Topo Plot
theme_forestry_void(base_size = 16) +
ggplot2::theme(legend.position = "none")
}
p_wa <- plot_hex_sf(wa_hex_sf, wa_bnd)
p_ga <- plot_hex_sf(ga_hex_sf, ga_bnd)
# 5. Patchwork Assembly
combined_plot <- (p_wa + p_ga) +
patchwork::plot_layout(guides = "collect") +
patchwork::plot_annotation(
tag_levels = 'a', tag_prefix = '(', tag_suffix = ')'
) &
# Ensure the combined plot also uses base_size 16
theme_forestry_void(base_size = 28)
return(combined_plot)
}
#' Get Major Peaks for Plotting
#'
#' Internal helper to return peak locations for WA and GA.
#'
#' @param state_name String. Either "Washington" or "Georgia".
#' @return An sf object containing peak names and locations.
#'
#' @importFrom tibble tibble
#' @importFrom sf st_as_sf
#' @noRd
get_state_peaks <- function(state_name) {
if (state_name == "Washington") {
tibble::tibble(
peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
} else if (state_name == "Georgia") {
tibble::tibble(
peak = c("Brasstown Bald", "Rabun Bald", "Blood Mtn"),
lat = c(34.8743, 34.9660, 34.7398),
lon = c(-83.8111, -83.2996, -83.9367)
) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
} else { NULL }
}
#' Plot Spatial Autocorrelation Exploration
#'
#' @description Generates a Moran Scatterplot to visualize spatial
#' autocorrelation in elevation data using a 5km neighborhood.
#'
#' @param wa_data A dataframe or tibble containing elevation, lat, and lon columns.
#'
#' @return A ggplot object showing standardized elevation vs. spatially lagged elevation.
#'
#' @importFrom sf st_as_sf st_transform st_coordinates st_drop_geometry
#' @importFrom spdep dnearneigh nb2listw lag.listw card
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth geom_hline geom_vline labs theme_minimal
#' @importFrom dplyr mutate
#'
#' @export
plot_spatial_exploration <- function(wa_data) {
# 1. Convert to sf and project to meters (Washington State Plane North)
# This ensures the 5000 distance in dnearneigh is 5,000 meters.
wa_sf <- wa_data |>
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
sf::st_transform(2285)
# 2. Extract coordinates and build weights (5km radius)
coords <- sf::st_coordinates(wa_sf)
nb <- spdep::dnearneigh(coords, 0, 5000)
# Remove points with zero neighbors to prevent divide-by-zero errors
ids_with_nb <- which(spdep::card(nb) > 0)
nb <- subset(nb, spdep::card(nb) > 0)
wa_sf <- wa_sf[ids_with_nb, ]
# Row-standardized weights matrix
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
# 3. Prep data for plotting
plot_df <- wa_sf |>
sf::st_drop_geometry() |>
dplyr::mutate(
# Scale elevation to get z-scores
elev_scaled = as.vector(scale(elevation)),
# Calculate the spatial lag (average elevation of neighbors)
elev_lag = spdep::lag.listw(lw, elev_scaled, zero.policy = TRUE)
)
# 4. Build Plot Object
p <- ggplot2::ggplot(plot_df, ggplot2::aes(x = elev_scaled, y = elev_lag)) +
ggplot2::geom_point(alpha = 0.2, color = "#2c3e50") +
ggplot2::geom_smooth(
method = "lm",
color = "#e74c3c",
linetype = "dashed",
formula = y ~ x
) +
ggplot2::geom_hline(yintercept = 0, alpha = 0.3) +
ggplot2::geom_vline(xintercept = 0, alpha = 0.3) +
ggplot2::labs(
title = "Spatial Autocorrelation: Moran Scatterplot",
subtitle = "Washington Elevation (5km neighborhood)",
x = "Standardized Elevation (Z-score)",
y = "Spatially Lagged Elevation (Neighbors)"
) +
ggplot2::theme_minimal()
return(p)
}