2288 lines
79 KiB
R
2288 lines
79 KiB
R
#' Helper: Save Plot for Quarto Slide
|
||
#'
|
||
#' @description Saves a ggplot object as a PNG, sized to fit comfortably
|
||
#' below a standard slide title, with robust font handling.
|
||
#'
|
||
#' @param plot_obj The ggplot object to save.
|
||
#' @param name A short descriptive name (e.g., "wa_ecoregions").
|
||
#' @param type Either "map" or "plot". Adds this prefix to the filename.
|
||
#' @param width Width in inches (default: 10).
|
||
#' @param height Height in inches (default: 6.18).
|
||
#' @param dpi Resolution (default: 300).
|
||
#'
|
||
#' @return The full file path (invisibly).
|
||
#' @export
|
||
helper_save_fig <- function(plot_obj, name, type = c("map", "plot"),
|
||
width = 10, height = 6.18, dpi = 300) {
|
||
|
||
# 1. Input Validation & Directory Setup
|
||
type <- match.arg(type)
|
||
if (!dir.exists("figs")) dir.create("figs")
|
||
|
||
# Construct filename: figs/map_washington.png
|
||
filename <- paste0("figs/", type, "_", name, ".png")
|
||
|
||
# 2. Force Fonts (The "Amnesia" Fix)
|
||
# Critical: We must turn the engine on immediately before saving
|
||
# to prevent the new graphics device from reverting to Arial.
|
||
sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
|
||
showtext::showtext_auto()
|
||
showtext::showtext_opts(dpi = dpi)
|
||
|
||
# 3. Save the File
|
||
ggplot2::ggsave(
|
||
filename = filename,
|
||
plot = plot_obj,
|
||
width = width,
|
||
height = height,
|
||
dpi = dpi,
|
||
bg = "white" # Prevents transparent backgrounds
|
||
)
|
||
|
||
message(paste("Saved:", filename))
|
||
return(invisible(filename))
|
||
}
|
||
|
||
#' Register Project Fonts
|
||
#'
|
||
#' Registers 'Atkinson Hyperlegible Next' (Sans) and 'Atkinson Hyperlegible Mono'
|
||
#' (Monospace) with the sysfonts package for use in R graphics.
|
||
#'
|
||
#' @return NULL (called for side effects)
|
||
#' @export
|
||
setup_forestry_fonts <- function() {
|
||
|
||
font_dir <- "assets/fonts"
|
||
|
||
if (!dir.exists(font_dir)) {
|
||
warning(paste("Font directory not found at:", font_dir))
|
||
return(NULL)
|
||
}
|
||
|
||
# 1. Main Text Font (Sans)
|
||
sysfonts::font_add(
|
||
family = "atkinson",
|
||
regular = file.path(font_dir, "AtkinsonHyperlegibleNext-Regular.ttf"),
|
||
bold = file.path(font_dir, "AtkinsonHyperlegibleNext-Bold.ttf"),
|
||
italic = file.path(font_dir, "AtkinsonHyperlegibleNext-Italic.ttf"),
|
||
bolditalic = file.path(font_dir, "AtkinsonHyperlegibleNext-BoldItalic.ttf")
|
||
)
|
||
|
||
# 2. Code/Number Font (Mono)
|
||
sysfonts::font_add(
|
||
family = "atkinson_mono",
|
||
regular = file.path(font_dir, "AtkinsonHyperlegibleMono-Regular.ttf"),
|
||
bold = file.path(font_dir, "AtkinsonHyperlegibleMono-Bold.ttf")
|
||
)
|
||
|
||
# 3. Enable showtext rendering
|
||
showtext::showtext_auto()
|
||
|
||
showtext::showtext_opts(dpi = 300)
|
||
}
|
||
|
||
|
||
#' Standardized Void Theme (Maximal Data Ink)
|
||
#' @description Removes axes/grids for shape-focused maps, but keeps project fonts.
|
||
#' @export
|
||
theme_forestry_void <- function(base_size = 16) {
|
||
# 1. Start with the void (empty) theme
|
||
ggplot2::theme_void(base_family = "atkinson", base_size = base_size) +
|
||
|
||
# 2. Add back the specific text elements we need
|
||
ggplot2::theme(
|
||
# Keep the Legend styled correctly
|
||
legend.position = "bottom",
|
||
legend.title = ggplot2::element_text(
|
||
family = "atkinson",
|
||
face = "bold",
|
||
size = ggplot2::rel(1)
|
||
),
|
||
legend.text = ggplot2::element_text(
|
||
family = "atkinson",
|
||
size = ggplot2::rel(0.9)
|
||
),
|
||
|
||
# Keep Titles (if you use them inside the plot)
|
||
plot.title = ggplot2::element_text(
|
||
family = "atkinson",
|
||
face = "bold",
|
||
hjust = 0.5,
|
||
margin = ggplot2::margin(b = 5)
|
||
),
|
||
|
||
# Zero margins to force edges to touch
|
||
plot.margin = ggplot2::margin(5, 5, 5, 5)
|
||
)
|
||
}
|
||
|
||
#' Standard Forestry Plot Theme (Cowplot + Atkinson)
|
||
#'
|
||
#' @description A standardized theme for non-spatial plots (scatter, bar, line).
|
||
#' Based on cowplot::theme_cowplot(), it includes clean axes and a minimalist look.
|
||
#' Uses 'Atkinson Hyperlegible Next' for all text.
|
||
#'
|
||
#' @param font_size Integer. Base font size. Default is 14 (good for slides).
|
||
#' @param grid Logical. If TRUE, adds a light gray grid (useful for presentations).
|
||
#'
|
||
#' @importFrom cowplot theme_cowplot background_grid
|
||
#' @importFrom ggplot2 theme element_text element_rect margin
|
||
#' @export
|
||
theme_forestry_plot <- function(font_size = 14, grid = TRUE) {
|
||
|
||
# 1. Ensure Fonts are Registered
|
||
setup_forestry_fonts()
|
||
|
||
# 2. Base Theme (Cowplot with Atkinson)
|
||
# cowplot::theme_cowplot has a 'font_family' argument we can leverage directly
|
||
base <- cowplot::theme_cowplot(font_size = font_size, font_family = "atkinson") +
|
||
ggplot2::theme(
|
||
|
||
# Fonts: We force family="atkinson" here just to be safe for overrides
|
||
plot.title = ggplot2::element_text(family = "atkinson", face = "bold", hjust = 0.5, size = font_size + 2),
|
||
axis.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
|
||
legend.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
|
||
legend.text = ggplot2::element_text(family = "atkinson", size = font_size - 1),
|
||
|
||
# Optional: Use Monospace for axis numbers? (Uncomment if you want the "technical" look)
|
||
# axis.text = ggplot2::element_text(family = "atkinson_mono", size = font_size - 1),
|
||
|
||
# Backgrounds (Transparent for slides)
|
||
plot.background = ggplot2::element_rect(fill = "transparent", color = NA),
|
||
panel.background = ggplot2::element_rect(fill = "transparent", color = NA),
|
||
|
||
# Margins
|
||
legend.box.margin = ggplot2::margin(10, 10, 10, 10)
|
||
)
|
||
|
||
# 3. Optional Grid
|
||
if (grid) {
|
||
base <- base + cowplot::background_grid(major = "y", minor = "none", color.major = "grey92")
|
||
}
|
||
|
||
return(base)
|
||
}
|
||
|
||
#' Standardized Spatial Theme (Atkinson)
|
||
#' @description High-visibility map theme for presentations using Atkinson fonts.
|
||
#' @importFrom ggplot2 theme_minimal theme element_line element_blank element_text element_rect margin unit
|
||
#' @export
|
||
theme_forestry_spatial <- function(base_size = 16) {
|
||
|
||
# 1. Ensure Fonts are Registered
|
||
setup_forestry_fonts()
|
||
|
||
# 2. Base Theme with Relative Sizing
|
||
ggplot2::theme_minimal(base_family = "atkinson", base_size = base_size) +
|
||
ggplot2::theme(
|
||
# Minimalist grid lines (Graticules)
|
||
panel.grid.major = ggplot2::element_line(color = "grey90", linewidth = 0.2),
|
||
panel.grid.minor = ggplot2::element_blank(),
|
||
|
||
# Titles: Scale relative to base_size (12 * 1.5 = 18pt)
|
||
plot.title = ggplot2::element_text(
|
||
family = "atkinson",
|
||
face = "bold",
|
||
size = ggplot2::rel(1.5),
|
||
color = "#2c3e50"
|
||
),
|
||
|
||
# Axis Labels: Slightly smaller than base (12 * 0.8 = 9.6pt)
|
||
axis.text = ggplot2::element_text(
|
||
family = "atkinson",
|
||
size = ggplot2::rel(0.8),
|
||
color = "#5d6d7e"
|
||
),
|
||
|
||
# Legend: Scale relative to base
|
||
legend.position = "right",
|
||
legend.title = ggplot2::element_text(
|
||
family = "atkinson",
|
||
size = ggplot2::rel(1.1), # ~13.2pt
|
||
face = "bold"
|
||
),
|
||
legend.text = ggplot2::element_text(
|
||
family = "atkinson",
|
||
size = ggplot2::rel(0.9) # ~10.8pt
|
||
),
|
||
|
||
# Clean borders
|
||
panel.border = ggplot2::element_rect(color = "grey80", fill = NA, linewidth = 0.5),
|
||
plot.margin = ggplot2::margin(5, 5, 5, 5)
|
||
)
|
||
}
|
||
#' Simplified Theme Diagnostic
|
||
#' @description Uses built-in NC data to verify theme_forestry_spatial.
|
||
#' @importFrom sf st_read st_crs
|
||
#' @importFrom ggplot2 ggplot geom_sf labs coord_sf scale_x_continuous scale_y_continuous
|
||
#' @export
|
||
plot_theme_diagnostic <- function() {
|
||
# Pull NC data included with the sf package
|
||
nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
|
||
|
||
# Render with your spatial theme
|
||
ggplot2::ggplot(nc) +
|
||
ggplot2::geom_sf(fill = "#27ae60", alpha = 0.3, color = "#2c3e50") +
|
||
# Force WGS84 graticules (Decimal Degrees)
|
||
ggplot2::coord_sf(datum = sf::st_crs(4326)) +
|
||
theme_forestry_spatial() +
|
||
ggplot2::labs(
|
||
title = "Theme Diagnostic: North Carolina",
|
||
subtitle = "Testing 24pt Title and 14pt Graticule Labels"
|
||
)
|
||
}
|
||
|
||
#' Combine Washington and Georgia Forest Data
|
||
#'
|
||
#' Merges the Washington and Georgia datasets into a single data frame, adding a
|
||
#' column to identify the source state.
|
||
#'
|
||
#' @param wa_data A data frame containing the Washington forest inventory data.
|
||
#' @param ga_data A data frame containing the Georgia forest inventory data.
|
||
#'
|
||
#' @return A single combined data frame with an additional column `.id`
|
||
#' (renamed to "state") indicating the source ("WA" or "GA").
|
||
#'
|
||
#' @examples
|
||
#' \dontrun{
|
||
#' combined <- combine_forest(wa_raw, ga_raw)
|
||
#' table(combined$state)
|
||
#' }
|
||
#'
|
||
#' @export
|
||
combine_forest <- function(wa_data, ga_data){
|
||
dplyr::bind_rows(
|
||
list(WA = wa_data, GA = ga_data),
|
||
.id = "state"
|
||
)
|
||
}
|
||
|
||
#' Format Statistical Summary Table
|
||
#'
|
||
#' @description Calculates descriptive statistics and returns a formatted `gt` table
|
||
#' ready for slide presentation.
|
||
#'
|
||
#' @param data A data frame or sf object.
|
||
#' @return A `gt` table object.
|
||
#'
|
||
#' @importFrom dplyr select arrange desc any_of where
|
||
#' @importFrom sf st_drop_geometry
|
||
#' @importFrom psych describe
|
||
#' @importFrom tibble rownames_to_column
|
||
#' @importFrom gt gt fmt_number cols_label tab_options google_font
|
||
#' @export
|
||
format_summary_table <- function(data) {
|
||
|
||
# 1. Calculate Stats (The Logic)
|
||
stats_df <- data %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::select(dplyr::where(is.numeric)) %>%
|
||
dplyr::select(-dplyr::any_of(c("year", "lon", "lat"))) %>%
|
||
psych::describe() %>%
|
||
as.data.frame() %>%
|
||
tibble::rownames_to_column(var = "Variable") %>%
|
||
dplyr::select(-vars, -range, -se, -mad) %>%
|
||
dplyr::arrange(dplyr::desc(abs(kurtosis)))
|
||
|
||
# 2. Format Table (The Presentation)
|
||
stats_df %>%
|
||
gt::gt() %>%
|
||
gt::opt_table_font(
|
||
font = gt::google_font("Atkinson Hyperlegible")
|
||
) %>%
|
||
# Clean up column labels
|
||
gt::cols_label(
|
||
Variable = "Variable",
|
||
n = "N",
|
||
mean = "Mean",
|
||
sd = "SD",
|
||
median = "Median",
|
||
trimmed = "Trimmed",
|
||
min = "Min",
|
||
max = "Max",
|
||
skew = "Skew",
|
||
kurtosis = "Kurtosis"
|
||
) %>%
|
||
# Format all numbers to 2 decimal places
|
||
gt::fmt_number(
|
||
columns = dplyr::where(is.numeric),
|
||
decimals = 2
|
||
) %>%
|
||
# Format 'N' to 0 decimals
|
||
gt::fmt_number(
|
||
columns = "n",
|
||
decimals = 0
|
||
) %>%
|
||
# Styling: Increase font size and use your project font
|
||
gt::tab_options(
|
||
table.font.names = "Atkinson Hyperlegible",
|
||
table.font.size = "22px", # <--- Increases size to fill the slide
|
||
data_row.padding = "8px" # Adds breathing room
|
||
) %>%
|
||
gt::text_transform(
|
||
locations = gt::cells_body(columns = dplyr::where(is.numeric)),
|
||
fn = function(x) {
|
||
# \u2212 is the unicode minus that breaks things. Replace with standard "-".
|
||
gsub("\u2212", "-", x)
|
||
}
|
||
)
|
||
}
|
||
|
||
#' Plot US Map with Forestry Theme
|
||
#' @description Highlights Washington and Georgia using standardized presentation fonts.
|
||
#' @importFrom tigris states shift_geometry
|
||
#' @importFrom rmapshaper ms_simplify ms_filter_islands
|
||
#' @importFrom dplyr filter mutate
|
||
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_manual labs
|
||
#' @export
|
||
plot_us_map <- function() {
|
||
|
||
# 1. Fetch and filter for the Lower 48
|
||
us_map <- tigris::states(cb = TRUE, resolution = "20m", year = 2022, progress_bar = FALSE) %>%
|
||
dplyr::filter(!STUSPS %in% c("AK", "HI", "PR")) %>%
|
||
rmapshaper::ms_simplify(keep = 0.2) %>%
|
||
rmapshaper::ms_filter_islands(min_area = 1e9) %>%
|
||
tigris::shift_geometry() %>%
|
||
dplyr::mutate(
|
||
highlight = dplyr::case_when(
|
||
STUSPS == "WA" ~ "Washington",
|
||
STUSPS == "GA" ~ "Georgia",
|
||
TRUE ~ NA_character_
|
||
)
|
||
)
|
||
|
||
# 2. Define Colors
|
||
state_colors <- c("Washington" = "#E16A86", "Georgia" = "#00AD9A")
|
||
|
||
# 3. Plot
|
||
ggplot2::ggplot() +
|
||
# Layer 1: Base Map
|
||
ggplot2::geom_sf(data = us_map, fill = "grey98", color = "grey50", linewidth = 0.25) +
|
||
|
||
# Layer 2: Highlight Layer
|
||
ggplot2::geom_sf(
|
||
data = dplyr::filter(us_map, !is.na(highlight)),
|
||
ggplot2::aes(fill = highlight),
|
||
color = "grey50",
|
||
linewidth = 0.25
|
||
) +
|
||
|
||
# Custom Scales
|
||
ggplot2::scale_fill_manual(values = state_colors, name = "Study Sites") +
|
||
|
||
# Standardized Forestry Theme
|
||
theme_forestry_spatial()
|
||
}
|
||
|
||
|
||
#' Create Single State Topo Plot
|
||
#' @description Generates a topo map with manually tuned label placement for Georgia.
|
||
#' @export
|
||
plot_state_topo <- function(data, boundary_sf, raster_path, state_name) {
|
||
|
||
# 1. Load & Process Raster
|
||
elev_terra <- terra::rast(raster_path)
|
||
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
|
||
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
|
||
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
|
||
|
||
# 2. Prep Vectors
|
||
boundary_sf <- sf::st_transform(boundary_sf, 4326)
|
||
peaks_sf <- get_state_peaks(state_name)
|
||
|
||
# --- FIX: Manual Nudge Logic ---
|
||
# Define default nudges (0 means "let ggrepel decide")
|
||
nx <- 0
|
||
ny <- 0
|
||
|
||
if (state_name == "Georgia" && !is.null(peaks_sf)) {
|
||
# Prepare nudge vectors specific to the Georgia peaks to uncross the lines
|
||
# Units are in Decimal Degrees (roughly: 0.1 deg ~ 11km)
|
||
peaks_sf <- peaks_sf %>%
|
||
dplyr::mutate(
|
||
nudge_x = dplyr::case_when(
|
||
peak == "Rabun Bald" ~ 0.3, # Push Hard Right (East)
|
||
peak == "Blood Mtn" ~ -0.3, # Push Hard Left (West)
|
||
peak == "Brasstown Bald" ~ 0, # Keep Center horizontally
|
||
TRUE ~ 0
|
||
),
|
||
nudge_y = dplyr::case_when(
|
||
peak == "Brasstown Bald" ~ 0.2, # Push Up (North)
|
||
peak == "Rabun Bald" ~ -0.1, # Push Down slightly
|
||
TRUE ~ 0
|
||
)
|
||
)
|
||
|
||
# Extract as vectors for ggrepel
|
||
nx <- peaks_sf$nudge_x
|
||
ny <- peaks_sf$nudge_y
|
||
}
|
||
|
||
# 3. Plot
|
||
ggplot2::ggplot() +
|
||
tidyterra::geom_spatraster(data = elev_terra) +
|
||
tidyterra::scale_fill_hypso_c(
|
||
palette = "usgs-gswa2",
|
||
name = "Elevation (m)",
|
||
na.value = "transparent"
|
||
) +
|
||
tidyterra::geom_spatraster(
|
||
data = hill_terra,
|
||
ggplot2::aes(alpha = ggplot2::after_stat(value)),
|
||
fill = "black",
|
||
show.legend = FALSE
|
||
) +
|
||
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
|
||
ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
|
||
|
||
# REPEL SETTINGS
|
||
{if(!is.null(peaks_sf)) ggrepel::geom_label_repel(
|
||
data = peaks_sf, ggplot2::aes(label = peak, geometry = geometry),
|
||
stat = "sf_coordinates",
|
||
size = 6,
|
||
family = "atkinson",
|
||
fontface = "bold",
|
||
|
||
# Inject the manual nudges here
|
||
nudge_x = nx,
|
||
nudge_y = ny,
|
||
|
||
box.padding = 0.8,
|
||
point.padding = 0.5,
|
||
force = 10,
|
||
min.segment.length = 0
|
||
)} +
|
||
|
||
theme_forestry_void(base_size = 16) +
|
||
ggplot2::coord_sf(expand = FALSE) +
|
||
|
||
ggplot2::guides(
|
||
fill = ggplot2::guide_colorbar(
|
||
title.position = "top",
|
||
title.hjust = 0.5,
|
||
barwidth = ggplot2::unit(5, "cm"),
|
||
barheight = ggplot2::unit(0.3, "cm"),
|
||
frame.colour = "black",
|
||
ticks.colour = "black"
|
||
)
|
||
) +
|
||
ggplot2::theme(
|
||
legend.position = "bottom",
|
||
legend.margin = ggplot2::margin(t = 5, b = 0)
|
||
)
|
||
}
|
||
|
||
|
||
#' Save Combined Side-by-Side Topo Plot
|
||
#' @export
|
||
save_combined_topo <- function(wa_data, ga_data, wa_boundary, ga_boundary,
|
||
wa_raster_path, ga_raster_path, output_path) {
|
||
|
||
# [FIX 2] Ensure fonts are registered & showtext is ON for this worker process
|
||
setup_forestry_fonts()
|
||
|
||
p_wa <- plot_state_topo(wa_data, wa_boundary, wa_raster_path, "Washington")
|
||
p_ga <- plot_state_topo(ga_data, ga_boundary, ga_raster_path, "Georgia")
|
||
|
||
combined_plot <- patchwork::wrap_plots(p_wa, p_ga, ncol = 2) +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a',
|
||
tag_prefix = '(',
|
||
tag_suffix = ')',
|
||
theme = theme_forestry_void(base_size = 20)
|
||
)
|
||
|
||
dir.create(dirname(output_path), showWarnings = FALSE)
|
||
|
||
# Save
|
||
ggplot2::ggsave(output_path, plot = combined_plot, width = 12, height = 6, dpi = 300, bg = "white")
|
||
return(output_path)
|
||
}
|
||
|
||
#' Plot Regional Comparison of Forested Data (WA vs GA)
|
||
#'
|
||
#' @description Generates a side-by-side comparison of forest cover for Washington
|
||
#' and Georgia. It handles font registration (Atkinson Hyperlegible), spatial
|
||
#' transformations, and creates a combined plot with a shared legend.
|
||
#'
|
||
#' @param data A data frame containing the forest point data. Must contain
|
||
#' columns `lon`, `lat`, `state`, and `forested`.
|
||
#' @param boundaries An `sf` object containing state boundaries. Must contain
|
||
#' a `NAME` column.
|
||
#'
|
||
#' @return A `patchwork` object containing the combined side-by-side maps.
|
||
#'
|
||
#' @importFrom sysfonts font_add_google
|
||
#' @importFrom showtext showtext_auto
|
||
#' @importFrom dplyr filter
|
||
#' @importFrom sf st_as_sf st_transform st_crs
|
||
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual coord_sf guides guide_legend theme_void theme element_text
|
||
#' @importFrom patchwork wrap_plots plot_layout plot_annotation
|
||
#'
|
||
#' @export
|
||
plot_regional_comparison <- function(data, boundaries) {
|
||
# Load the font from Google Fonts (or a local file)
|
||
font_add_google("Atkinson Hyperlegible", family = "atkinson")
|
||
|
||
# Turn on showtext to render the font
|
||
showtext_auto()
|
||
|
||
# Spatial Prep (Fixing the NAD83/WGS84 mismatch)
|
||
forest_sf <- data %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_transform(sf::st_crs(boundaries))
|
||
|
||
state_lookup <- c("Washington" = "WA", "Georgia" = "GA")
|
||
# Washington Plot
|
||
abbr_wa <- state_lookup["Washington"]
|
||
p_wa <- ggplot2::ggplot() +
|
||
# State Boundary
|
||
ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Washington"),
|
||
fill = "grey98", color = "grey80") +
|
||
# Forest Points
|
||
ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_wa),
|
||
ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
|
||
# Scales
|
||
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"),
|
||
name = "Forested") +
|
||
ggplot2::coord_sf(expand = TRUE) +
|
||
# Legend Tweaks
|
||
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
|
||
theme_void()
|
||
# Georgia Plot
|
||
abbr_ga <- state_lookup["Georgia"]
|
||
p_ga <- ggplot2::ggplot() +
|
||
# State Boundary
|
||
ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Georgia"),
|
||
fill = "grey98", color = "grey80") +
|
||
# Forest Points
|
||
ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_ga),
|
||
ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
|
||
# Scales
|
||
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"),
|
||
name = "Forested") +
|
||
ggplot2::coord_sf(expand = TRUE) +
|
||
# Legend Tweaks
|
||
ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
|
||
theme_void()
|
||
|
||
# Combine
|
||
final_plot <- patchwork::wrap_plots(p_wa, p_ga) +
|
||
patchwork::plot_layout(guides = "collect") +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a',
|
||
tag_prefix = '(',
|
||
tag_suffix = ')'
|
||
) &
|
||
ggplot2::theme(
|
||
text = element_text(family = "atkinson", size = 16),
|
||
legend.position = "bottom"
|
||
)
|
||
return(final_plot)
|
||
}
|
||
#' Download EPA Level III Ecoregions Data
|
||
#'
|
||
#' @description Downloads the EPA Level III Ecoregions shapefile (zip format)
|
||
#' to a local directory. Implements a caching check to avoid re-downloading
|
||
#' if the file already exists.
|
||
#'
|
||
#' @param url Character string. The direct URL to the EPA Ecoregions zip file.
|
||
#' @param dest_dir Character string. The local directory where the data should
|
||
#' be saved. Defaults to "data/epa".
|
||
#'
|
||
#' @return A character string containing the full file path to the downloaded zip file.
|
||
#' This return value is designed to be tracked by \code{targets}.
|
||
#'
|
||
#' @importFrom utils download.file
|
||
#' @export
|
||
get_epa_ecoregions <- function(url, dest_dir = "data/epa") {
|
||
# Ensure directory exists
|
||
if (!dir.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE)
|
||
|
||
# Define destination
|
||
dest_file <- file.path(dest_dir, "us_eco_l3.zip")
|
||
|
||
# Download only if missing
|
||
if (!file.exists(dest_file)) {
|
||
message("Downloading EPA Ecoregion Data...")
|
||
download.file(url, dest_file, mode = "wb")
|
||
}
|
||
|
||
# Return the file path for the next target to track
|
||
return(dest_file)
|
||
}
|
||
|
||
#' Process and Clip EPA Ecoregions
|
||
#'
|
||
#' @description Extracts EPA Level III ecoregion data from a zipped shapefile,
|
||
#' standardizes column names, and clips the geometry to specified state boundaries.
|
||
#' It includes robust steps for geometry repair (handling spherical validity),
|
||
#' small island removal, and simplification for optimized plotting.
|
||
#'
|
||
#' @param zip_path Character string. The file path to the zipped EPA shapefile.
|
||
#' @param target_states Character vector. The names of the states to clip the
|
||
#' ecoregions to. Defaults to \code{c("Washington", "Georgia")}.
|
||
#' @param simplify_tol Numeric. The simplification tolerance passed to
|
||
#' \code{rmapshaper::ms_simplify}. Range is 0-1, where higher numbers remove more detail.
|
||
#' Defaults to 0.05.
|
||
#'
|
||
#' @return An \code{sf} object containing the processed ecoregions with standardized
|
||
#' columns \code{US_L3NAME} and \code{STATE_NAME}.
|
||
#'
|
||
#' @importFrom tigris states
|
||
#' @importFrom dplyr filter rename select %>%
|
||
#' @importFrom rmapshaper ms_simplify ms_filter_islands
|
||
#' @importFrom sf read_sf st_transform st_crs st_make_valid st_intersection sf_use_s2
|
||
#' @importFrom utils unzip
|
||
#' @export
|
||
process_ecoregions <- function(zip_path, target_states = c("Washington", "Georgia"), simplify_tol = 0.05) {
|
||
message("Processing shapefiles for: ", paste(target_states, collapse = ", "))
|
||
|
||
# A. Fetch State Boundaries (The "Cookie Cutter")
|
||
boundaries <- tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
|
||
filter(NAME %in% target_states) %>%
|
||
# Simplify boundaries to match the ecoregion resolution
|
||
rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
|
||
|
||
# B. Unzip and Read EPA Data
|
||
clean_dir <- tempfile()
|
||
dir.create(clean_dir)
|
||
unzip(zip_path, exdir = clean_dir)
|
||
|
||
shp_file <- list.files(clean_dir, pattern = "\\.shp$", full.names = TRUE, recursive = TRUE)[1]
|
||
eco_raw <- sf::read_sf(shp_file)
|
||
|
||
# C. Standardize Columns (Handle variations in column names)
|
||
if (!"US_L3NAME" %in% names(eco_raw)) {
|
||
if ("NA_L3NAME" %in% names(eco_raw)) {
|
||
eco_raw <- rename(eco_raw, US_L3NAME = NA_L3NAME)
|
||
} else if ("LEVEL3_NAM" %in% names(eco_raw)) {
|
||
eco_raw <- rename(eco_raw, US_L3NAME = LEVEL3_NAM)
|
||
}
|
||
}
|
||
|
||
# D. Clip, Clean, and Simplify
|
||
suppressMessages(sf::sf_use_s2(FALSE)) # Turn off spherical geometry for robust clipping
|
||
|
||
eco_clean <- eco_raw %>%
|
||
st_transform(st_crs(boundaries)) %>%
|
||
st_make_valid() %>%
|
||
st_intersection(boundaries) %>%
|
||
select(US_L3NAME, STATE_NAME = NAME) %>%
|
||
rmapshaper::ms_filter_islands(min_area = 200000000) %>%
|
||
rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
|
||
|
||
suppressMessages(sf::sf_use_s2(TRUE))
|
||
|
||
return(eco_clean)
|
||
}
|
||
|
||
#' Plot Ecoregion Complexity Comparison (WA vs GA)
|
||
#'
|
||
#' @description Generates a side-by-side comparison of Level III ecoregions for Washington
|
||
#' and Georgia. It uses a "void" theme, qualitative colors, and carefully tuned
|
||
#' label repulsion settings to avoid overlapping text.
|
||
#'
|
||
#' @param eco_data An \code{sf} object containing ecoregion polygons. Must contain
|
||
#' columns \code{STATE_NAME} and \code{US_L3NAME}.
|
||
#'
|
||
#' @return A \code{patchwork} object containing the combined plot.
|
||
#'
|
||
#' @importFrom dplyr filter group_by summarize mutate
|
||
#' @importFrom sf st_transform st_union st_point_on_surface st_coordinates st_drop_geometry
|
||
#' @importFrom stringr str_wrap
|
||
#' @importFrom ggplot2 ggplot geom_sf aes coord_sf theme_void scale_fill_discrete guide_none element_text margin element_rect labs
|
||
#' @importFrom ggrepel geom_label_repel
|
||
#' @importFrom colorspace scale_fill_discrete_qualitative
|
||
#' @importFrom patchwork plot_annotation
|
||
#'
|
||
#' @export
|
||
plot_ecoregion_comparison <- function(eco_data) {
|
||
|
||
# 1. WASHINGTON PLOT
|
||
# A. Filter & Transform
|
||
wa_e <- eco_data %>%
|
||
dplyr::filter(STATE_NAME == "Washington") %>%
|
||
sf::st_transform(4326)
|
||
|
||
# B. Calculate Centroids for Labels
|
||
wa_labels <- wa_e %>%
|
||
dplyr::group_by(US_L3NAME) %>%
|
||
dplyr::summarize(geometry = sf::st_union(geometry)) %>%
|
||
dplyr::mutate(
|
||
lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
|
||
lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
|
||
clean_label = stringr::str_wrap(US_L3NAME, width = 35)
|
||
) %>%
|
||
sf::st_drop_geometry()
|
||
|
||
# C. Generate Plot
|
||
p_wa <- ggplot2::ggplot() +
|
||
# Geometries
|
||
ggplot2::geom_sf(data = wa_e, ggplot2::aes(fill = US_L3NAME),
|
||
alpha = 1, color = "white", linewidth = 0.2) +
|
||
|
||
ggplot2::coord_sf(datum = NA, expand = TRUE) +
|
||
|
||
# Repelled Labels
|
||
ggrepel::geom_label_repel(
|
||
data = wa_labels,
|
||
ggplot2::aes(x = lon, y = lat, label = clean_label),
|
||
inherit.aes = FALSE,
|
||
size = 5,
|
||
family = "atkinson",
|
||
lineheight = 1.6,
|
||
box.padding = 0.75,
|
||
min.segment.length = 0,
|
||
force = 30,
|
||
max.overlaps = Inf,
|
||
alpha = 0.95,
|
||
segment.color = "grey30",
|
||
segment.size = 1,
|
||
seed = 42
|
||
) +
|
||
|
||
# Colors
|
||
colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
|
||
|
||
# Manual Theme
|
||
ggplot2::theme_void(base_family = "atkinson")
|
||
|
||
|
||
# 2. GEORGIA PLOT
|
||
# A. Filter & Transform
|
||
ga_e <- eco_data %>%
|
||
dplyr::filter(STATE_NAME == "Georgia") %>%
|
||
sf::st_transform(4326)
|
||
|
||
# B. Calculate Centroids for Labels
|
||
ga_labels <- ga_e %>%
|
||
dplyr::group_by(US_L3NAME) %>%
|
||
dplyr::summarize(geometry = sf::st_union(geometry)) %>%
|
||
dplyr::mutate(
|
||
lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
|
||
lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
|
||
clean_label = stringr::str_wrap(US_L3NAME, width = 35)
|
||
) %>%
|
||
sf::st_drop_geometry()
|
||
|
||
# C. Generate Plot
|
||
p_ga <- ggplot2::ggplot() +
|
||
# Geometries
|
||
ggplot2::geom_sf(data = ga_e, ggplot2::aes(fill = US_L3NAME),
|
||
alpha = 1, color = "white", linewidth = 0.2) +
|
||
|
||
ggplot2::coord_sf(datum = NA, expand = TRUE) +
|
||
|
||
# Repelled Labels
|
||
ggrepel::geom_label_repel(
|
||
data = ga_labels,
|
||
ggplot2::aes(x = lon, y = lat, label = clean_label),
|
||
inherit.aes = FALSE,
|
||
size = 5,
|
||
family = "atkinson",
|
||
lineheight = 1.6,
|
||
box.padding = 0.75,
|
||
min.segment.length = 0,
|
||
force = 40,
|
||
max.overlaps = Inf,
|
||
alpha = 0.95,
|
||
segment.color = "grey30",
|
||
segment.size = 1,
|
||
seed = 4
|
||
) +
|
||
|
||
# Colors
|
||
colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
|
||
|
||
# Manual Theme
|
||
ggplot2::theme_void(base_family = "atkinson") +
|
||
ggplot2::theme(
|
||
plot.title = ggplot2::element_text(face = "bold", margin = ggplot2::margin(b = 10)),
|
||
plot.tag = ggplot2::element_text(face = "bold"),
|
||
legend.position = "bottom"
|
||
) +
|
||
ggplot2::labs(x = NULL, y = NULL)
|
||
|
||
|
||
# 3. COMBINE & ANNOTATE
|
||
combined_plot <- (p_wa + p_ga) +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a',
|
||
tag_prefix = '(',
|
||
tag_suffix = ')'
|
||
) &
|
||
# Apply theme settings globally to the patchwork
|
||
ggplot2::theme_void(base_family = "atkinson")
|
||
|
||
return(combined_plot)
|
||
}
|
||
|
||
|
||
#' Plot Univariate Distributions by Forest Cover
|
||
#'
|
||
#' @description Generates a faceted density plot comparing numeric environmental
|
||
#' variables between forested and non-forested areas. It uses the "Dark 3"
|
||
#' qualitative palette and applys the Wilke cowplot theme.
|
||
#'
|
||
#' @param data A data frame or sf object containing the forest data.
|
||
#' Must include `forested` column and numeric environmental variables.
|
||
#'
|
||
#' @return A `ggplot` object.
|
||
#'
|
||
#' @importFrom dplyr select starts_with
|
||
#' @importFrom tidyr pivot_longer
|
||
#' @importFrom ggplot2 ggplot aes geom_density facet_wrap scale_fill_manual labs
|
||
#' @importFrom cowplot theme_cowplot
|
||
#' @importFrom colorspace qualitative_hcl
|
||
#' @importFrom sysfonts font_add_google
|
||
#' @importFrom showtext showtext_auto
|
||
#'
|
||
#' @export
|
||
plot_forest_distributions <- function(data) {
|
||
|
||
# 1. Typography Setup
|
||
sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
|
||
showtext::showtext_auto()
|
||
|
||
# 2. Color Palette (Dark 3 from colorspace)
|
||
# Generating the hex codes specifically for 2 levels
|
||
pal_colors <- colorspace::qualitative_hcl(palette = "Dark 3", n = 2)
|
||
|
||
# 3. Plotting
|
||
data %>%
|
||
dplyr::select(
|
||
forested, elevation, eastness, northness, roughness,
|
||
dew_temp, precip_annual,
|
||
dplyr::starts_with("temp_"),
|
||
dplyr::starts_with("vapor_")
|
||
) %>%
|
||
tidyr::pivot_longer(
|
||
cols = -forested,
|
||
names_to = "variable",
|
||
values_to = "value"
|
||
) %>%
|
||
ggplot2::ggplot(ggplot2::aes(x = value, fill = forested)) +
|
||
# color = NA removes the outline for a cleaner "Wilke-style" look
|
||
ggplot2::geom_density(alpha = 0.6, color = NA) +
|
||
ggplot2::facet_wrap(~ variable, scales = "free") +
|
||
|
||
# Scales & Labels
|
||
ggplot2::scale_fill_manual(values = pal_colors) +
|
||
ggplot2::labs(
|
||
x = NULL,
|
||
y = "Density",
|
||
fill = "Forested"
|
||
) +
|
||
ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05))) +
|
||
ggplot2::scale_x_continuous(
|
||
expand = ggplot2::expansion(mult = c(0, 0)),
|
||
labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
|
||
|
||
# Theme
|
||
cowplot::theme_cowplot(font_family = "atkinson", font_size = 22) +
|
||
# Minor tweaks to spacing
|
||
ggplot2::theme(
|
||
legend.position = "bottom",
|
||
strip.background = ggplot2::element_blank(), # Clean facet headers
|
||
strip.text = ggplot2::element_text(face = "bold")
|
||
)
|
||
}
|
||
identify_outliers <- function(data){
|
||
# 1. Prepare the data (calculate Z-scores first)
|
||
plot_data <- data %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::select(forested, where(is.numeric)) %>%
|
||
dplyr::select(-any_of(c("lon", "lat", "year"))) %>%
|
||
tidyr::pivot_longer(cols = -forested, names_to = "variable", values_to = "value") %>%
|
||
dplyr::group_by(variable) %>%
|
||
# Wrap scale in as.numeric to fix the matrix warning
|
||
dplyr::mutate(scaled_value = as.numeric(scale(value))) %>%
|
||
dplyr::ungroup()
|
||
|
||
# 2. Plot
|
||
ggplot2::ggplot(plot_data, aes(x = forested, y = scaled_value, fill = forested)) +
|
||
# Draw the main distribution (suppress default outliers)
|
||
ggplot2::geom_boxplot(outlier.shape = NA, alpha = 0.4) +
|
||
|
||
# CRITICAL FIX: Only pass the extreme values to the points layer
|
||
ggplot2::geom_jitter(
|
||
data = dplyr::filter(plot_data, abs(scaled_value) > 3),
|
||
width = 0.2,
|
||
alpha = 0.6,
|
||
size = 1.5 # Make the outliers pop
|
||
) +
|
||
|
||
ggplot2::facet_wrap(~variable, scales = "free_y") +
|
||
ggplot2::theme_minimal() +
|
||
ggplot2::scale_fill_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
|
||
ggplot2::labs(
|
||
title = "Standardized Distribution & Extremes",
|
||
subtitle = "Points indicate observations > 3 Standard Deviations from mean",
|
||
y = "Z-Score (σ)"
|
||
)
|
||
}
|
||
|
||
fetch_study_area <- function(target_states) {
|
||
tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
|
||
dplyr::filter(NAME %in% target_states) %>%
|
||
rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
|
||
}
|
||
|
||
fetch_state_boundary <- function(state){
|
||
tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
|
||
dplyr::filter(NAME == state) %>%
|
||
rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
|
||
}
|
||
|
||
create_elevation_raster <- function(boundary_sf, path) {
|
||
elev_raster <- elevatr::get_elev_raster(locations = boundary_sf, z = 7, clip = "locations")
|
||
elev_terra <- terra::rast(elev_raster) %>% terra::mask(terra::vect(boundary_sf))
|
||
terra::writeRaster(elev_terra, path, overwrite = TRUE)
|
||
return(path)
|
||
}
|
||
#' Save Outlier Diagnostic Map
|
||
#'
|
||
#' @description Generates a diagnostic map highlighting multivariate outliers (Z > 3)
|
||
#' overlaid on a hillshaded elevation raster. Uses the standardized forestry theme.
|
||
#'
|
||
#' @param data A data frame containing the analysis dataset (must include numeric columns and 'forested' factor).
|
||
#' @param boundary_sf An \code{sf} object representing the state boundary (e.g., Washington).
|
||
#' @param raster_path Character string. File path to the elevation raster (.tif).
|
||
#' @param output_path Character string. File path where the PNG will be saved.
|
||
#'
|
||
#' @importFrom terra rast terrain shade
|
||
#' @importFrom sf st_transform st_as_sf st_filter st_drop_geometry
|
||
#' @importFrom dplyr select filter if_any all_of mutate
|
||
#' @importFrom tibble tibble
|
||
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_manual labs ggsave guides guide_legend
|
||
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
|
||
#' @importFrom ggrepel geom_label_repel
|
||
#'
|
||
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
|
||
#' @export
|
||
save_outlier_map_png <- function(data, boundary_sf, raster_path, output_path) {
|
||
|
||
# 1. Load & Process Raster
|
||
elev_terra <- terra::rast(raster_path)
|
||
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
|
||
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
|
||
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
|
||
|
||
# 2. Prep Vectors
|
||
wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
|
||
|
||
data_sf <- data %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_filter(wa_boundary_sf)
|
||
|
||
# Identify numeric columns automatically
|
||
numeric_cols <- data_sf %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::select(where(is.numeric)) %>%
|
||
names()
|
||
|
||
# Filter for >3 Z-score
|
||
outliers_sf <- data_sf %>%
|
||
dplyr::filter(dplyr::if_any(dplyr::all_of(numeric_cols), ~ abs(scale(.)) > 3))
|
||
|
||
# WA Peak Labels
|
||
wa_peaks_sf <- tibble::tibble(
|
||
peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
|
||
lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
|
||
lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
|
||
) %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
|
||
|
||
# 3. Plot
|
||
plt <- ggplot2::ggplot() +
|
||
# Layer A: Elevation Color
|
||
tidyterra::geom_spatraster(data = elev_terra) +
|
||
tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
|
||
|
||
# Layer B: Hillshade Shadow Mask
|
||
tidyterra::geom_spatraster(
|
||
data = hill_terra,
|
||
ggplot2::aes(alpha = ggplot2::after_stat(value)),
|
||
fill = "black",
|
||
show.legend = FALSE
|
||
) +
|
||
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
|
||
|
||
# Layer C: Vectors (Boundary + Outlier Points)
|
||
ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
|
||
ggplot2::geom_sf(data = outliers_sf, ggplot2::aes(color = forested), size = 2.5) +
|
||
ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
|
||
|
||
# Layer D: Labels
|
||
ggrepel::geom_label_repel(
|
||
data = wa_peaks_sf,
|
||
ggplot2::aes(label = peak, geometry = geometry),
|
||
stat = "sf_coordinates",
|
||
size = 5,
|
||
fontface = "bold",
|
||
box.padding = 0.6,
|
||
min.segment.length = 0
|
||
) +
|
||
|
||
# Theme Application
|
||
theme_forestry_spatial() +
|
||
labs(x = "", y = "")
|
||
|
||
# Save
|
||
ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
|
||
return(output_path)
|
||
}
|
||
|
||
plot_wa_pca <- function(data) {
|
||
# REVISED: Keep Lat/Lon in the numeric matrix
|
||
numeric_mat <- data %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::select(where(is.numeric)) %>%
|
||
dplyr::select(-any_of(c("year")))
|
||
|
||
pca_res <- stats::prcomp(numeric_mat, scale. = TRUE)
|
||
|
||
# Calculate variance
|
||
var_exp <- round(100 * pca_res$sdev^2 / sum(pca_res$sdev^2), 1)
|
||
|
||
# Calculate the sum for the reader
|
||
total_var <- var_exp[1] + var_exp[2]
|
||
|
||
as.data.frame(pca_res$x) %>%
|
||
dplyr::mutate(forested = data$forested) %>%
|
||
ggplot2::ggplot(aes(x = PC1, y = PC2, color = forested)) +
|
||
ggplot2::geom_point(alpha = 0.5) +
|
||
ggplot2::scale_color_manual(values = c("Yes" = "#2D5A27", "No" = "#A0522D")) +
|
||
ggplot2::theme_minimal(base_size = 14) +
|
||
ggplot2::labs(
|
||
title = "PCA: Biophysical + Geography",
|
||
# REVISED SUBTITLE: Sums it up cleanly
|
||
subtitle = paste0("First two components capture ", total_var, "% of total variance"),
|
||
x = paste0("PC1 (", var_exp[1], "%)"),
|
||
y = paste0("PC2 (", var_exp[2], "%)")
|
||
) +
|
||
ggplot2::scale_x_continuous(limits = c(-10, 5))
|
||
}
|
||
|
||
plot_correlations <- function(data) {
|
||
# 1. Data Prep (Same as before)
|
||
cor_data <- data %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::mutate(Forested_Binary = dplyr::if_else(forested == "Yes", 1, 0)) %>%
|
||
dplyr::select(Forested_Binary, dplyr::where(is.numeric)) %>%
|
||
dplyr::select(-any_of(c("year")))
|
||
|
||
cor_df <- cor(cor_data, use = "pairwise.complete.obs") %>%
|
||
as.data.frame() %>%
|
||
dplyr::select(correlation = Forested_Binary) %>%
|
||
tibble::rownames_to_column("variable") %>%
|
||
dplyr::filter(variable != "Forested_Binary") %>%
|
||
dplyr::mutate(
|
||
sign = dplyr::if_else(correlation > 0, "Positive", "Negative"),
|
||
is_spatial = dplyr::if_else(variable %in% c("lat", "lon"), "Spatial", "Biophysical")
|
||
)
|
||
|
||
# 2. Plot with SPACE
|
||
ggplot2::ggplot(cor_df, aes(x = reorder(variable, correlation), y = correlation)) +
|
||
ggplot2::geom_segment(aes(xend = variable, yend = 0), color = "gray50") +
|
||
ggplot2::geom_point(aes(color = sign, shape = is_spatial), size = 5) +
|
||
|
||
ggplot2::coord_flip() +
|
||
ggplot2::theme_minimal(base_size = 18) +
|
||
|
||
ggplot2::scale_color_manual(values = c("Positive" = "#228B22", "Negative" = "#D2691E")) +
|
||
|
||
ggplot2::labs(
|
||
title = "What drives the 'Forested' classification?",
|
||
subtitle = "Correlations with Forested (Yes=1). Note the impact of Latitude.",
|
||
x = NULL,
|
||
y = "Correlation Coefficient",
|
||
color = "Direction",
|
||
shape = "Variable Type"
|
||
)
|
||
}
|
||
|
||
#' Plot Random Forest Variable Importance
|
||
#'
|
||
#' @description Fits a ranger Random Forest model to the provided data, calculates
|
||
#' permutation importance, and generates a lollipop chart. It distinguishes
|
||
#' between spatial (lat/lon) and biophysical predictors.
|
||
#' Uses the project's 'Atkinson' font theme via theme_forestry_plot().
|
||
#'
|
||
#' @param data An sf object or data frame containing the 'forested' target and predictors.
|
||
#'
|
||
#' @importFrom ranger ranger
|
||
#' @importFrom vip vi
|
||
#' @importFrom sf st_drop_geometry
|
||
#' @importFrom dplyr select mutate if_else any_of where
|
||
#' @importFrom ggplot2 ggplot aes geom_segment geom_point scale_shape_manual scale_color_manual labs
|
||
#' @export
|
||
plot_rf_importance <- function(data) {
|
||
|
||
# 1. Clean Data & Fit Model
|
||
model_data <- data %>%
|
||
sf::st_drop_geometry() %>%
|
||
dplyr::select(forested, dplyr::where(is.numeric)) %>%
|
||
dplyr::select(-dplyr::any_of(c("year"))) %>%
|
||
dplyr::mutate(forested = as.factor(forested))
|
||
|
||
rf_model <- ranger::ranger(
|
||
formula = forested ~ .,
|
||
data = model_data,
|
||
importance = "permutation",
|
||
seed = 123
|
||
)
|
||
|
||
# 2. Extract importance & Add Spatial Flag
|
||
imp_df <- vip::vi(rf_model) %>%
|
||
dplyr::mutate(
|
||
Type = dplyr::if_else(Variable %in% c("lat", "lon"), "Spatial", "Biophysical")
|
||
)
|
||
|
||
# 3. Tufte-style Lollipop with Shapes
|
||
ggplot2::ggplot(imp_df, ggplot2::aes(x = Importance, y = reorder(Variable, Importance))) +
|
||
# Stems
|
||
ggplot2::geom_segment(ggplot2::aes(xend = 0, yend = Variable), color = "gray60", linewidth = 0.8) +
|
||
|
||
# Dots (Mapped to Shape)
|
||
ggplot2::geom_point(
|
||
ggplot2::aes(shape = Type, color = Type),
|
||
size = 5
|
||
) +
|
||
|
||
# --- VISUAL MAPPING ---
|
||
ggplot2::scale_shape_manual(values = c("Biophysical" = 16, "Spatial" = 17)) +
|
||
ggplot2::scale_color_manual(values = c("Biophysical" = "#228B22", "Spatial" = "black")) +
|
||
|
||
# --- THEME FIX ---
|
||
# We use your custom theme, which now enforces Atkinson fonts.
|
||
# turning off grid because lollipops look cleaner without it
|
||
theme_forestry_plot(font_size = 16, grid = FALSE) +
|
||
|
||
ggplot2::labs(
|
||
title = "Random Forest Variable Importance",
|
||
subtitle = "Permutation Importance. Note the high rank of spatial variables.",
|
||
y = NULL,
|
||
x = "Importance (Loss in Accuracy if scrambled)",
|
||
shape = "Variable Type",
|
||
color = "Variable Type"
|
||
)
|
||
}
|
||
plot_umap_forested <- function(data) {
|
||
|
||
data_clean <- data %>%
|
||
drop_na()
|
||
|
||
numeric_data <- data_clean %>%
|
||
select(where(is.numeric)) %>%
|
||
scale()
|
||
|
||
set.seed(42)
|
||
umap_fit <- umap::umap(numeric_data)
|
||
|
||
# Format for plotting
|
||
umap_df <- umap_fit$layout %>%
|
||
as.data.frame() %>%
|
||
dplyr::rename(UMAP1 = V1, UMAP2 = V2) %>%
|
||
dplyr::mutate(forested = as.factor(data_clean$forested))
|
||
|
||
# Generate Plot
|
||
ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = forested)) +
|
||
geom_point(alpha = 0.6, size = 1.5) +
|
||
#scale_color_viridis_d(name = "Forested Status", begin = 0.2, end = 0.8) +
|
||
scale_color_manual(
|
||
name = "Forested?",
|
||
# Assumes factor order is (No, Yes). Swap colors if order differs.
|
||
values = c("#228B22", "#D2691E")
|
||
) +
|
||
theme_minimal() +
|
||
labs(
|
||
title = "UMAP Projection of Forested Areas",
|
||
subtitle = "Clustering based on numeric landscape features",
|
||
x = "UMAP Dimension 1",
|
||
y = "UMAP Dimension 2"
|
||
)
|
||
}
|
||
|
||
plot_cv_strategies <- function(data) {
|
||
|
||
# 1. SETUP DATA & ADD ID
|
||
# We MUST add an ID column to track rows across splits
|
||
forested_sf <- st_as_sf(
|
||
data,
|
||
coords = c("lon", "lat"),
|
||
crs = 4326,
|
||
remove = FALSE
|
||
) %>%
|
||
mutate(id = row_number()) # <--- Critical: Adds the ID we need later
|
||
|
||
# 2. DEFINE CUSTOM ECOLOGICAL DISTANCE
|
||
dist_env <- function(x) {
|
||
x %>%
|
||
st_drop_geometry() %>%
|
||
select(elevation, precip_annual, temp_annual_mean, roughness) %>%
|
||
scale() %>%
|
||
dist()
|
||
}
|
||
|
||
# 3. CREATE THE SPLITS
|
||
set.seed(42)
|
||
|
||
# A. Random
|
||
random_folds <- vfold_cv(forested_sf, v = 5)
|
||
|
||
# B. Spatial Block
|
||
spatial_folds <- spatial_block_cv(forested_sf, v = 5)
|
||
|
||
# C. Environmental Clustering
|
||
cluster_folds <- spatial_clustering_cv(
|
||
forested_sf,
|
||
v = 5,
|
||
cluster_function = "hclust",
|
||
distance_function = dist_env
|
||
)
|
||
|
||
# 4. HELPER: EXTRACT FOLD IDs
|
||
# This assigns a Fold Number (1-5) to every single point
|
||
get_fold_ids <- function(splits, original_data) {
|
||
results <- original_data %>%
|
||
mutate(fold = NA_character_)
|
||
|
||
for(i in seq_along(splits$splits)) {
|
||
test_ids <- assessment(splits$splits[[i]]) %>% pull(id)
|
||
results <- results %>%
|
||
mutate(fold = ifelse(id %in% test_ids, as.character(i), fold))
|
||
}
|
||
return(results)
|
||
}
|
||
|
||
# 5. PREPARE PLOTTING DATA
|
||
df_random <- get_fold_ids(random_folds, forested_sf)
|
||
df_spatial <- get_fold_ids(spatial_folds, forested_sf)
|
||
df_cluster <- get_fold_ids(cluster_folds, forested_sf)
|
||
|
||
# 6. COMMON PLOT FUNCTION
|
||
plot_folds <- function(df, title) {
|
||
ggplot(df) +
|
||
geom_sf(aes(color = fold), size = 0.5, alpha = 0.6) +
|
||
scale_color_brewer(palette = "Set1", name = "Fold") + # <--- The Rainbow Colors
|
||
#scale_color_discrete_qualitative(palette = "Harmonic", name = "Fold") +
|
||
#scale_color_discrete_qualitative(palette = "Dark 3", name = "Fold") +
|
||
#scale_color_discrete_qualitative(palette = "Dynamic", name = "Fold") +
|
||
ggtitle(title) +
|
||
theme_void() +
|
||
theme(
|
||
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
|
||
legend.position = "none"
|
||
)
|
||
}
|
||
|
||
# 7. GENERATE & COMBINE
|
||
p1 <- plot_folds(df_random, "Random\n(Confetti)")
|
||
p2 <- plot_folds(df_spatial, "Spatial Block\n(Checkerboard)")
|
||
p3 <- plot_folds(df_cluster, "Env. Clustering\n(Ecoregions)")
|
||
|
||
p1 + p2 + p3 + plot_layout(ncol = 3)
|
||
}
|
||
|
||
plot_spatial_cv_comparison <- function(res_random, res_block, res_cluster) {
|
||
|
||
# 1. Extract & Clean Data
|
||
all_metrics <- bind_rows(
|
||
res_random %>% collect_metrics() %>% mutate(strategy = "Random CV"),
|
||
res_block %>% collect_metrics() %>% mutate(strategy = "Block CV"),
|
||
res_cluster %>% collect_metrics() %>% mutate(strategy = "Cluster CV")
|
||
) %>%
|
||
filter(.metric == "roc_auc") %>%
|
||
mutate(
|
||
recipe_label = case_when(
|
||
str_detect(wflow_id, "^base") ~ "Coords",
|
||
str_detect(wflow_id, "^non_spatial") ~ "No Coords",
|
||
str_detect(wflow_id, "^extensible") ~ "Extensible",
|
||
TRUE ~ "Unknown"
|
||
),
|
||
model = str_remove(wflow_id, "(base_|non_spatial_|extensible_)"),
|
||
strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")),
|
||
recipe_label = factor(recipe_label, levels = c("Coords", "No Coords", "Extensible"))
|
||
)
|
||
|
||
# 2. Generate Plot
|
||
ggplot(all_metrics, aes(x = model, y = mean, color = model)) +
|
||
geom_hline(yintercept = 0.96, linetype = "dashed", color = "gray50", alpha = 0.5) +
|
||
geom_point(size = 4) +
|
||
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), width = 0.2) +
|
||
scale_color_discrete_qualitative(palette = "Dark 3") +
|
||
facet_grid(recipe_label ~ strategy) +
|
||
scale_y_continuous(labels = scales::number_format(accuracy = 0.01),
|
||
limits = c(.65, 1)) +
|
||
labs(
|
||
y = "ROC AUC Score",
|
||
x = "Model Type",
|
||
color = "Model"
|
||
) +
|
||
theme_bw(base_size = 14) +
|
||
theme(
|
||
legend.position = "none",
|
||
panel.grid.minor = element_blank(),
|
||
strip.text = element_text(face = "bold", size = 10)
|
||
)
|
||
}
|
||
plot_spatial_vip_comparison <- function(res_random, res_block, res_cluster, model_id = "base_rf") {
|
||
|
||
# Internal helper to extract VIP from the best fold
|
||
get_best_fold_vip <- function(wflow_obj, strategy_name) {
|
||
|
||
# 1. Extract results for the specific model ID (e.g., "base_rf")
|
||
resample_results <- wflow_obj %>% extract_workflow_set_result(model_id)
|
||
|
||
# 2. Find the best fold
|
||
best_fold_id <- resample_results %>%
|
||
collect_metrics(summarize = FALSE) %>%
|
||
filter(.metric == "roc_auc") %>%
|
||
slice_max(.estimate, n = 1) %>%
|
||
slice(1) %>%
|
||
pull(id)
|
||
|
||
# 3. Get the split object for that fold
|
||
best_split <- resample_results$splits[[which(resample_results$id == best_fold_id)]]
|
||
|
||
# 4. Refit and calculate Importance
|
||
wflow_obj %>%
|
||
extract_workflow(model_id) %>%
|
||
finalize_workflow(select_best(resample_results, metric = "roc_auc")) %>%
|
||
fit(data = analysis(best_split)) %>%
|
||
extract_fit_parsnip() %>%
|
||
vi() %>%
|
||
mutate(strategy = strategy_name)
|
||
}
|
||
|
||
# Consolidate and clean
|
||
vip_data <- bind_rows(
|
||
get_best_fold_vip(res_random, "Random CV"),
|
||
get_best_fold_vip(res_block, "Block CV"),
|
||
get_best_fold_vip(res_cluster, "Cluster CV")
|
||
) %>%
|
||
# Filter ONLY for Lat/Lon (The interaction term doesn't exist in your new recipe)
|
||
filter(Variable %in% c("lat", "lon")) %>%
|
||
mutate(
|
||
Variable = case_when(
|
||
Variable == "lon" ~ "Longitude",
|
||
Variable == "lat" ~ "Latitude",
|
||
TRUE ~ Variable
|
||
),
|
||
strategy = factor(strategy, levels = c("Cluster CV", "Block CV", "Random CV"))
|
||
)
|
||
|
||
# Generate Plot
|
||
ggplot(vip_data, aes(x = Importance, y = strategy)) +
|
||
geom_segment(aes(x = 0, xend = Importance, y = strategy, yend = strategy),
|
||
color = "gray20", linewidth = .65) +
|
||
geom_point(aes(color = strategy), size = 5, show.legend = FALSE) +
|
||
facet_grid(Variable ~ ., scales = "free_y", space = "free_y", switch = "y") +
|
||
scale_color_discrete_qualitative(palette = "Dark 3") +
|
||
scale_x_continuous(expand = c(0, 0)) + # Removed fixed limit (105) to let it scale automatically
|
||
labs(
|
||
title = paste("Spatial Overfitting Check:", model_id),
|
||
subtitle = "Does the model rely on coordinates?",
|
||
x = "Importance Score (Impurity)",
|
||
y = NULL
|
||
) +
|
||
theme_classic(base_size = 14) +
|
||
theme(
|
||
panel.grid.minor = element_blank()
|
||
)
|
||
}
|
||
# vvv ADD THIS vvv
|
||
plot_model_stability <- function(res_random, res_block, res_cluster, model_id) {
|
||
|
||
# Internal helper to extract per-fold metrics
|
||
extract_fold_metrics <- function(wflow_obj, strategy_name) {
|
||
wflow_obj %>%
|
||
extract_workflow_set_result(id = model_id) %>% # <--- Now this works
|
||
collect_metrics(summarize = FALSE) %>%
|
||
filter(.metric == "roc_auc") %>%
|
||
mutate(strategy = strategy_name)
|
||
}
|
||
|
||
# Combine and factorize
|
||
all_fold_metrics <- bind_rows(
|
||
extract_fold_metrics(res_random, "Random CV"),
|
||
extract_fold_metrics(res_block, "Block CV"),
|
||
extract_fold_metrics(res_cluster, "Cluster CV")
|
||
) %>%
|
||
mutate(strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")))
|
||
|
||
# Generate Plot
|
||
ggplot(all_fold_metrics, aes(x = strategy, y = .estimate, color = strategy)) +
|
||
geom_violin(alpha = 0.4, outlier.shape = NA, width = 0.5) +
|
||
geom_jitter(width = 0.1, size = 3, alpha = 0.8) +
|
||
scale_fill_discrete_qualitative(palette = "Dark 3") +
|
||
scale_color_discrete_qualitative(palette = "Dark 3") +
|
||
scale_y_continuous(limits = c(0.80, 1.0), breaks = seq(0.80, 1.0, 0.05)) +
|
||
labs(
|
||
title = paste("Stability Analysis:", model_id), # <--- Helpful title
|
||
x = NULL,
|
||
y = "ROC AUC (per fold)",
|
||
caption = ""
|
||
) +
|
||
theme_classic(base_size = 14) +
|
||
theme(
|
||
legend.position = "none"
|
||
)
|
||
}
|
||
plot_final_test_results <- function(final_fit_obj) {
|
||
final_metrics <- final_fit_obj %>%
|
||
collect_metrics() %>%
|
||
filter(.metric %in% c("roc_auc", "accuracy")) %>%
|
||
mutate(
|
||
.metric = case_when(
|
||
.metric == "roc_auc" ~ "ROC AUC",
|
||
.metric == "accuracy" ~ "Accuracy",
|
||
TRUE ~ .metric
|
||
)
|
||
)
|
||
|
||
ggplot(final_metrics, aes(x = .metric, y = .estimate, fill = .metric)) +
|
||
geom_col(width = 0.6, show.legend = FALSE) +
|
||
geom_text(aes(label = round(.estimate, 3)), vjust = -0.5, size = 5, fontface = "bold") +
|
||
scale_fill_manual(values = c("ROC AUC" = "#2c3e50", "Accuracy" = "#18bc9c")) +
|
||
scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2), expand = c(0, 0)) +
|
||
labs(
|
||
x = NULL,
|
||
y = "Performance Score",
|
||
caption = "Evaluation based on the 20% held-out Washington Test Set."
|
||
) +
|
||
theme_classic(base_size = 14) +
|
||
theme(axis.text.x = element_text(face = "bold", size = 12))
|
||
}
|
||
|
||
plot_final_confusion_matrix <- function(final_fit_obj) {
|
||
# Extract the predictions from the last_fit object
|
||
preds <- final_fit_obj %>% collect_predictions()
|
||
|
||
# Generate the confusion matrix
|
||
cm <- preds %>%
|
||
conf_mat(truth = forested, estimate = .pred_class)
|
||
|
||
# Convert to a tidy format for plotting
|
||
autoplot(cm, type = "heatmap") +
|
||
scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
|
||
theme_minimal(base_size = 14) +
|
||
theme(
|
||
panel.grid = element_blank(),
|
||
axis.text = element_text(face = "bold"),
|
||
plot.title = element_text(face = "bold"),
|
||
legend.position = "none"
|
||
)
|
||
}
|
||
|
||
plot_fold_mechanics <- function(wa_sf, boundary_wa_sf) {
|
||
# 1. SETUP
|
||
set.seed(42)
|
||
|
||
# Ensure unique_id exists to prevent the "object not found" error
|
||
if(!"unique_id" %in% names(wa_sf)) {
|
||
wa_sf <- wa_sf %>% mutate(unique_id = row_number())
|
||
}
|
||
|
||
# 2. DEFINE CUSTOM DISTANCE
|
||
dist_env <- function(x) {
|
||
x %>%
|
||
st_drop_geometry() %>%
|
||
select(elevation, precip_annual, temp_annual_mean, roughness) %>%
|
||
scale() %>%
|
||
dist()
|
||
}
|
||
|
||
# 3. CREATE SPLITS
|
||
random_folds <- vfold_cv(wa_sf, v = 5)
|
||
spatial_folds <- spatial_block_cv(wa_sf, v = 5)
|
||
cluster_folds <- spatial_clustering_cv(wa_sf, v = 5, cluster_function = "hclust", distance_function = dist_env)
|
||
|
||
# 4. HELPER: EXTRACT STATUS
|
||
get_status <- function(split_obj) {
|
||
first_split <- split_obj$splits[[1]]
|
||
assessment_ids <- assessment(first_split) %>% pull(unique_id)
|
||
wa_sf %>%
|
||
mutate(status = if_else(unique_id %in% assessment_ids, "Assessment (Test)", "Analysis (Train)"))
|
||
}
|
||
|
||
df_r <- get_status(random_folds)
|
||
df_s <- get_status(spatial_folds)
|
||
df_c <- get_status(cluster_folds)
|
||
|
||
# 5. PLOTTER WITH BOUNDARY LAYER
|
||
plot_one <- function(df, title) {
|
||
ggplot() +
|
||
# Layer 1: The state boundary provides the spatial context
|
||
geom_sf(data = boundary_wa_sf, fill = "gray98", color = "gray85") +
|
||
# Layer 2: The actual data points
|
||
geom_sf(data = df, aes(color = status), size = 0.4, alpha = 0.7) +
|
||
scale_color_manual(values = c("Assessment (Test)" = "#E16A86", "Analysis (Train)" = "#606060")) +
|
||
ggtitle(title) +
|
||
theme_void() +
|
||
theme(
|
||
legend.position = "none",
|
||
plot.title = element_text(hjust = 0.5, face = "bold", size = 10)
|
||
)
|
||
}
|
||
|
||
# 6. COMBINE
|
||
(plot_one(df_r, "Random\n(Confetti)") +
|
||
plot_one(df_s, "Spatial\n(Checkerboard)") +
|
||
plot_one(df_c, "Env. Clustering\n(Ecoregions)")) +
|
||
plot_layout(ncol = 3, guides = "collect") &
|
||
theme(legend.position = 'bottom', legend.title = element_blank())
|
||
}
|
||
|
||
plot_classic_kfold_diagram <- function(){
|
||
library(tidyverse)
|
||
|
||
# 1. Setup Parameters
|
||
N_obs <- 100
|
||
K_folds <- 5
|
||
|
||
# 2. Create Mock Data and Assign Folds
|
||
set.seed(42)
|
||
base_data <- tibble(
|
||
obs_id = 1:N_obs,
|
||
assigned_fold_group = sample(rep(1:K_folds, length.out = N_obs))
|
||
)
|
||
|
||
# 3. Expand Data for Plotting (Cross-Join)
|
||
plot_data <- expand_grid(
|
||
obs_id = 1:N_obs,
|
||
iteration = 1:K_folds
|
||
) %>%
|
||
left_join(base_data, by = "obs_id") %>%
|
||
# Define Status: If the iteration matches the assigned fold group, it's Test data.
|
||
mutate(
|
||
status = case_when(
|
||
iteration == assigned_fold_group ~ "Assessment (20%)",
|
||
TRUE ~ "Analysis (80%)"
|
||
),
|
||
# Convert iteration to factor and reverse levels so Fold 1 is at the top of plot
|
||
iteration_fct = factor(iteration, levels = rev(1:K_folds))
|
||
)
|
||
|
||
# 4. Define Colors
|
||
cv_colors <- c(
|
||
"Analysis (80%)" = "#A6CEE3",
|
||
"Assessment (20%)" = "#FDB462"
|
||
)
|
||
|
||
# 5. Generate the Plot
|
||
ggplot(plot_data, aes(x = obs_id, y = iteration_fct, fill = status)) +
|
||
geom_tile(color = "white", linewidth = 0.2) +
|
||
scale_fill_manual(values = cv_colors, name = "Data Role") +
|
||
labs(
|
||
x = "Observations",
|
||
y = "Folds"
|
||
) +
|
||
|
||
# Theme adjustments
|
||
theme_minimal(base_size = 16) +
|
||
theme(
|
||
panel.grid = element_blank(),
|
||
axis.text.x = element_blank(),
|
||
axis.ticks.x = element_blank(),
|
||
legend.position = "bottom"
|
||
)
|
||
}
|
||
create_performance_table <- function(results_cluster, final_fit_results) {
|
||
require(dplyr)
|
||
require(gt)
|
||
require(tidymodels)
|
||
|
||
# 1. Extract the WINNING Model Info (Name & Score)
|
||
best_model_info <- results_cluster %>%
|
||
rank_results(rank_metric = "roc_auc", select_best = TRUE) %>%
|
||
filter(.metric == "roc_auc") %>%
|
||
slice(1) # Grab the #1 spot
|
||
|
||
# Get the dynamic name and score
|
||
best_model_name <- best_model_info$wflow_id
|
||
cv_score <- best_model_info$mean
|
||
|
||
# 2. Extract the Final Test Set Scores
|
||
test_scores <- final_fit_results %>%
|
||
collect_metrics() %>%
|
||
select(.metric, .estimate) %>%
|
||
tidyr::pivot_wider(names_from = .metric, values_from = .estimate)
|
||
|
||
# 3. Create the Combined Data Frame
|
||
summary_data <- tibble(
|
||
# FIXED: Uses the actual winner's name instead of hardcoded text
|
||
Model = best_model_name,
|
||
`Validation AUC` = cv_score,
|
||
`Test AUC` = test_scores$roc_auc,
|
||
`Test Accuracy` = test_scores$accuracy
|
||
)
|
||
|
||
# 4. Render with gt
|
||
summary_data %>%
|
||
gt() %>%
|
||
tab_header(
|
||
title = md("**Final Model Performance**"),
|
||
subtitle = paste("Winner:", best_model_name)
|
||
) %>%
|
||
fmt_number(
|
||
columns = where(is.numeric),
|
||
decimals = 3
|
||
) %>%
|
||
cols_align(
|
||
align = "center",
|
||
columns = where(is.numeric)
|
||
) %>%
|
||
tab_footnote(
|
||
footnote = "Mean ROC AUC across spatial cross-validation folds.",
|
||
locations = cells_column_labels(columns = `Validation AUC`)
|
||
) %>%
|
||
tab_footnote(
|
||
footnote = "Evaluated on the 20% independent Washington holdout set.",
|
||
locations = cells_column_labels(columns = contains("Test"))
|
||
) %>%
|
||
tab_options(
|
||
table.border.top.color = "white",
|
||
table.border.bottom.color = "black",
|
||
column_labels.font.weight = "bold"
|
||
)
|
||
}
|
||
|
||
plot_yeo_johnson <- function(data) {
|
||
# 1. Prepare Recipe & Extract Lambda
|
||
rec_yeo <- recipe(~ elevation, data = data) %>%
|
||
step_YeoJohnson(elevation) %>%
|
||
prep()
|
||
|
||
lambda_val <- tidy(rec_yeo, number = 1) %>%
|
||
filter(terms == "elevation") %>%
|
||
pull(value) %>%
|
||
round(2)
|
||
|
||
# 2. Create Plotting Data
|
||
plot_data <- data %>%
|
||
select(elevation) %>%
|
||
mutate(type = "Original") %>%
|
||
bind_rows(
|
||
bake(rec_yeo, new_data = data) %>%
|
||
mutate(type = "Transformed") %>%
|
||
rename(elevation_trans = elevation)
|
||
)
|
||
|
||
# 3. Setup Colors
|
||
dark3_cols <- colorspace::qualitative_hcl(2, palette = "Dark 3")
|
||
|
||
# 4. Plot A: Original
|
||
p1 <- ggplot(filter(plot_data, type == "Original"), aes(x = elevation)) +
|
||
geom_density(fill = dark3_cols[1], color = dark3_cols[1], alpha = 0.6, linewidth = 1) +
|
||
scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
|
||
scale_y_continuous(expand = c(0, 0)) +
|
||
labs(x = "Elevation (m)", y = "Density") +
|
||
theme_minimal(base_size = 14) +
|
||
theme(
|
||
panel.grid.minor = element_blank(),
|
||
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
|
||
)
|
||
|
||
# 5. Plot B: Transformed
|
||
p2 <- ggplot(filter(plot_data, type == "Transformed"), aes(x = elevation_trans)) +
|
||
geom_density(fill = dark3_cols[2], color = dark3_cols[2], alpha = 0.6, linewidth = 1) +
|
||
scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
|
||
scale_y_continuous(expand = c(0, 0)) +
|
||
labs(x = paste0("Yeo-Johnson (Lambda: ", lambda_val, ")"), y = "Density") +
|
||
theme_minimal(base_size = 14) +
|
||
theme(
|
||
panel.grid.minor = element_blank(),
|
||
axis.title.y = element_blank(),
|
||
axis.text.y = element_blank(),
|
||
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
|
||
)
|
||
|
||
# 6. Combine
|
||
p1 + p2
|
||
}
|
||
|
||
#' Save Model Error Diagnostic Map
|
||
#'
|
||
#' @description Generates a diagnostic map highlighting prediction errors. It plots
|
||
#' misclassified points colored by the magnitude of the error (confidence in the wrong answer)
|
||
#' over a hillshaded elevation background.
|
||
#'
|
||
#' @param data A data frame containing model predictions (must include '.pred_class',
|
||
#' 'forested', '.pred_Yes', 'lon', and 'lat').
|
||
#' @param boundary_sf An \code{sf} object representing the state boundary.
|
||
#' @param raster_path Character string. File path to the elevation raster (.tif).
|
||
#' @param output_path Character string. File path where the PNG will be saved.
|
||
#'
|
||
#' @importFrom terra rast terrain shade
|
||
#' @importFrom sf st_transform st_as_sf st_filter
|
||
#' @importFrom dplyr filter mutate
|
||
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_viridis_c labs ggsave guides guide_colorbar unit margin
|
||
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
|
||
#'
|
||
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
|
||
#' @export
|
||
save_error_map_png <- function(data, boundary_sf, raster_path, output_path) {
|
||
|
||
# 1. Load & Process Hillshade (Standard Setup)
|
||
elev_terra <- terra::rast(raster_path)
|
||
slope <- terra::terrain(elev_terra, "slope", unit = "radians")
|
||
aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
|
||
hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
|
||
|
||
# 2. Prep Vectors & Calculate Errors
|
||
wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
|
||
|
||
# Filter to ONLY the mistakes and calculate how "wrong" they were
|
||
error_sf <- data %>%
|
||
# Keep only rows where the Hard Class Prediction was wrong
|
||
dplyr::filter(.pred_class != forested) %>%
|
||
# Calculate Magnitude: How far was the probability from the truth?
|
||
dplyr::mutate(
|
||
truth_num = ifelse(forested == "Yes", 1, 0),
|
||
error_magnitude = abs(truth_num - .pred_Yes)
|
||
) %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_filter(wa_boundary_sf)
|
||
|
||
# 3. Plot
|
||
plt <- ggplot2::ggplot() +
|
||
# --- Background Layers ---
|
||
tidyterra::geom_spatraster(data = elev_terra) +
|
||
tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
|
||
tidyterra::geom_spatraster(data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE) +
|
||
ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
|
||
ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
|
||
|
||
# --- The Error Layer ---
|
||
ggplot2::geom_sf(
|
||
data = error_sf,
|
||
ggplot2::aes(color = error_magnitude),
|
||
size = 3,
|
||
alpha = 0.9
|
||
) +
|
||
ggplot2::scale_color_viridis_c(
|
||
option = "plasma",
|
||
name = "Error\nConfidence",
|
||
limits = c(0.5, 1.0),
|
||
direction = -1
|
||
) +
|
||
|
||
# --- Theme Application ---
|
||
theme_forestry_spatial() +
|
||
ggplot2::labs(x = NULL, y = NULL) +
|
||
|
||
# --- Legend Controls (Right Side Vertical) ---
|
||
ggplot2::guides(
|
||
fill = "none",
|
||
color = ggplot2::guide_colorbar(
|
||
title.position = "top",
|
||
barwidth = ggplot2::unit(0.5, "cm"), # Narrow width
|
||
barheight = ggplot2::unit(6, "cm") # Tall height for right side
|
||
)
|
||
) +
|
||
ggplot2::theme(legend.position = "right")
|
||
|
||
# Save
|
||
ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
|
||
return(output_path)
|
||
}
|
||
|
||
#' Calculate Area of Applicability Data
|
||
#'
|
||
#' @description Generates the Area of Applicability (AOA) scores (Dissimilarity Index)
|
||
#' for the Georgia extrapolation dataset based on the Washington training data.
|
||
#'
|
||
#' @param train_data Dataframe. The training data from Washington.
|
||
#' @param test_data Dataframe. The extrapolation data from Georgia.
|
||
#' @param predictors Character vector. The list of predictor variable names.
|
||
#'
|
||
#' @importFrom dplyr select all_of bind_cols
|
||
#' @importFrom tibble tibble
|
||
#' @importFrom waywiser ww_area_of_applicability
|
||
#' @importFrom stats predict
|
||
#' @importFrom sf st_as_sf
|
||
#'
|
||
#' @return An \code{sf} object containing the Georgia data with an added 'di' (Dissimilarity Index) column.
|
||
#' @export
|
||
calculate_ga_aoa <- function(train_data, test_data, predictors) {
|
||
|
||
# 1. Prepare Environments
|
||
train_env <- train_data %>% dplyr::select(dplyr::all_of(predictors))
|
||
test_env <- test_data %>% dplyr::select(dplyr::all_of(predictors))
|
||
|
||
# 2. Define "Flat" Importance
|
||
importance <- tibble::tibble(term = predictors, estimate = 1)
|
||
|
||
# 3. Fit AOA
|
||
aoa_model <- waywiser::ww_area_of_applicability(
|
||
x = train_env,
|
||
importance = importance
|
||
)
|
||
|
||
# 4. Predict
|
||
aoa_results <- stats::predict(aoa_model, test_env)
|
||
|
||
# 5. Return Spatial Object
|
||
test_data %>%
|
||
dplyr::bind_cols(aoa_results) %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
|
||
}
|
||
|
||
#' Plot Georgia Area of Applicability (AOA)
|
||
#'
|
||
#' @description Plots the pre-calculated Dissimilarity Index (DI) for Georgia.
|
||
#'
|
||
#' @param aoa_sf An \code{sf} object containing the 'di' column (output of \code{calculate_ga_aoa}).
|
||
#'
|
||
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_viridis_c labs guides guide_colorbar unit theme
|
||
#'
|
||
#' @return A \code{ggplot} object showing the Dissimilarity Index map.
|
||
#' @export
|
||
plot_georgia_aoa <- function(aoa_sf) {
|
||
|
||
ggplot2::ggplot(aoa_sf) +
|
||
# Points colored by Dissimilarity Index (DI)
|
||
ggplot2::geom_sf(ggplot2::aes(color = di), size = 1, alpha = 0.8) +
|
||
|
||
# Color Scale
|
||
ggplot2::scale_color_viridis_c(
|
||
option = "magma",
|
||
direction = 1,
|
||
name = "Dissimilarity\nIndex (DI)"
|
||
) +
|
||
|
||
# Theme
|
||
theme_forestry_spatial() +
|
||
ggplot2::labs(x = NULL, y = NULL) +
|
||
|
||
# Legend Control
|
||
ggplot2::guides(
|
||
color = ggplot2::guide_colorbar(
|
||
title.position = "top",
|
||
barwidth = ggplot2::unit(0.5, "cm"),
|
||
barheight = ggplot2::unit(6, "cm")
|
||
)
|
||
) +
|
||
ggplot2::theme(legend.position = "right")
|
||
}
|
||
|
||
# Predict on a new region (External Validation)
|
||
predict_external_region <- function(final_fit, new_data) {
|
||
wa_model <- final_fit$.workflow[[1]]
|
||
|
||
# FIX: Mock ALL columns that were in the training data
|
||
# The model expects 'geometry' because training data was an sf object
|
||
clean_data <- new_data %>%
|
||
dplyr::mutate(
|
||
northness = NA_real_,
|
||
county = NA_character_,
|
||
geometry = NA # Tidymodels will see this exists, then ignore it (as it's not a predictor)
|
||
)
|
||
|
||
# Predict Class and Probability
|
||
preds <- predict(wa_model, clean_data) %>%
|
||
dplyr::bind_cols(predict(wa_model, clean_data, type = "prob")) %>%
|
||
dplyr::bind_cols(new_data %>% dplyr::select(lat, lon, forested))
|
||
|
||
return(preds)
|
||
}
|
||
|
||
#' Plot Georgia Forest Comparison
|
||
#'
|
||
#' @description Creates a side-by-side comparison of forest cover for Georgia.
|
||
#' The left plot (a) is the Model Prediction, and the right plot (b) is the Actual Data.
|
||
#' Features a shared right-side legend and standardized spatial styling.
|
||
#'
|
||
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
|
||
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
|
||
#'
|
||
#' @importFrom dplyr filter
|
||
#' @importFrom sf st_as_sf st_transform st_crs
|
||
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual labs theme guides guide_legend element_text
|
||
#' @importFrom patchwork plot_layout wrap_plots plot_annotation
|
||
#'
|
||
#' @return A \code{patchwork} object containing the labeled comparison plot.
|
||
#' @export
|
||
plot_ga_comparison_map <- function(pred_data, boundaries) {
|
||
|
||
# 1. Prepare Data & Boundaries
|
||
ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
|
||
|
||
plot_data <- pred_data %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_transform(sf::st_crs(ga_boundary))
|
||
|
||
# 2. Helper Function (No Title Argument)
|
||
create_map <- function(data, fill_col) {
|
||
ggplot2::ggplot() +
|
||
# Background
|
||
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
|
||
# Points
|
||
ggplot2::geom_sf(data = data,
|
||
ggplot2::aes(color = {{ fill_col }}),
|
||
size = 0.5, alpha = 0.6) +
|
||
# Colors
|
||
ggplot2::scale_color_manual(
|
||
name = "Forest Cover",
|
||
values = c("Yes" = "#228B22", "No" = "#D2691E"),
|
||
labels = c("Forest", "Non-Forest")
|
||
) +
|
||
# Labels (No Title)
|
||
ggplot2::labs(title = NULL, x = NULL, y = NULL) +
|
||
|
||
# Theme
|
||
theme_forestry_spatial() +
|
||
|
||
# Define Legend Here to avoid Patchwork "Order of Ops" Error
|
||
ggplot2::guides(
|
||
color = ggplot2::guide_legend(
|
||
override.aes = list(size = 4, alpha = 1),
|
||
title.position = "top",
|
||
ncol = 1
|
||
)
|
||
) +
|
||
|
||
ggplot2::theme(
|
||
legend.title = ggplot2::element_text(face = "bold", size = 12),
|
||
legend.text = ggplot2::element_text(size = 10)
|
||
)
|
||
}
|
||
|
||
# 3. Create Subplots
|
||
p_pred <- create_map(plot_data, .pred_class) # Left: Predicted
|
||
p_actual <- create_map(plot_data, forested) # Right: Actual
|
||
|
||
# 4. Patchwork Layout with (a)/(b) Tags
|
||
combined_plot <- (p_pred + p_actual) +
|
||
patchwork::plot_layout(guides = "collect") +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a',
|
||
tag_prefix = '(',
|
||
tag_suffix = ')',
|
||
theme = theme_forestry_spatial() # Ensure tags match font
|
||
) &
|
||
ggplot2::theme(
|
||
legend.position = "right",
|
||
plot.tag = ggplot2::element_text(size = 20, face = "bold") # Make tags prominent
|
||
)
|
||
|
||
return(combined_plot)
|
||
}
|
||
plot_ga_confusion_matrix <- function(pred_data) {
|
||
# 1. Generate the confusion matrix object
|
||
cm <- pred_data %>%
|
||
yardstick::conf_mat(truth = forested, estimate = .pred_class)
|
||
|
||
# 2. Plot using autoplot to match the previous style exactly
|
||
plt <- ggplot2::autoplot(cm, type = "heatmap") +
|
||
|
||
# MATCHED: The exact blue gradient from your previous slide
|
||
ggplot2::scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
|
||
ggplot2::theme_minimal(base_size = 14) +
|
||
ggplot2::theme(
|
||
panel.grid = ggplot2::element_blank(),
|
||
axis.text = ggplot2::element_text(face = "bold"),
|
||
plot.title = ggplot2::element_text(face = "bold", hjust = 0.5), # Added hjust for center alignment
|
||
legend.position = "none"
|
||
)
|
||
|
||
return(plt)
|
||
}
|
||
#' Plot Failure Mechanism Comparison
|
||
#'
|
||
#' @description Creates a side-by-side diagnostic plot returning a patchwork object.
|
||
#' (a) The Area of Applicability (Dissimilarity Index) showing where the model is extrapolating.
|
||
#' (b) The spatial distribution of actual classification errors.
|
||
#'
|
||
#' @param aoa_data Dataframe containing the AOA results (must have 'di', 'lon', 'lat').
|
||
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
|
||
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
|
||
#'
|
||
#' @importFrom dplyr filter mutate case_when
|
||
#' @importFrom sf st_as_sf st_transform st_crs
|
||
#' @importFrom ggplot2 ggplot geom_sf aes labs theme guides guide_legend guide_colorbar element_rect element_text unit
|
||
#' @importFrom colorspace scale_color_discrete_qualitative
|
||
#' @importFrom patchwork plot_layout plot_annotation wrap_plots
|
||
#'
|
||
#' @return A \code{patchwork} object containing the side-by-side comparison.
|
||
#' @export
|
||
plot_failure_mechanism <- function(aoa_data, pred_data, boundaries) {
|
||
|
||
# 1. Prep Boundaries
|
||
ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
|
||
target_crs <- sf::st_crs(ga_boundary)
|
||
|
||
# 2. Prep Map A: Dissimilarity (Risk)
|
||
aoa_sf <- aoa_data %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_transform(target_crs)
|
||
|
||
p_risk <- ggplot2::ggplot() +
|
||
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
|
||
ggplot2::geom_sf(data = aoa_sf, ggplot2::aes(color = di), size = 0.5, alpha = 0.8) +
|
||
ggplot2::scale_color_viridis_c(
|
||
option = "magma",
|
||
direction = 1,
|
||
name = "Dissimilarity Idx."
|
||
) +
|
||
ggplot2::labs(x = NULL, y = NULL) +
|
||
theme_forestry_spatial() +
|
||
ggplot2::guides(
|
||
color = ggplot2::guide_colorbar(
|
||
title.position = "top",
|
||
title.hjust = 0.5,
|
||
barwidth = ggplot2::unit(4.5, "cm"),
|
||
barheight = ggplot2::unit(.3, "cm")
|
||
)
|
||
) +
|
||
ggplot2::theme(legend.position ="bottom")
|
||
|
||
# 3. Prep Map B: Failures (Effect)
|
||
errors_sf <- pred_data %>%
|
||
dplyr::filter(.pred_class != forested) %>%
|
||
dplyr::mutate(
|
||
error_type = dplyr::case_when(
|
||
.pred_class == "Yes" & forested == "No" ~ "False Positive",
|
||
.pred_class == "No" & forested == "Yes" ~ "False Negative"
|
||
)
|
||
) %>%
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_transform(target_crs)
|
||
|
||
p_fail <- ggplot2::ggplot() +
|
||
ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
|
||
ggplot2::geom_sf(data = errors_sf, ggplot2::aes(color = error_type), size = 0.6, alpha = 0.6) +
|
||
colorspace::scale_color_discrete_qualitative(
|
||
palette = "Dark 3",
|
||
name = "Error Type"
|
||
) +
|
||
ggplot2::labs(x = NULL, y = NULL) +
|
||
theme_forestry_spatial() +
|
||
ggplot2::guides(
|
||
color = ggplot2::guide_legend(
|
||
override.aes = list(size = 4, alpha = 1),
|
||
title.position = "top",
|
||
title.hjust = 0.5
|
||
)
|
||
) +
|
||
ggplot2::theme(legend.position = "bottom")
|
||
|
||
# 4. Patchwork Assembly
|
||
combined_plot <- (p_risk + p_fail) +
|
||
patchwork::plot_layout(guides = "keep") +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a',
|
||
tag_prefix = '(',
|
||
tag_suffix = ')',
|
||
theme = theme_forestry_spatial()
|
||
) &
|
||
ggplot2::theme(
|
||
plot.tag = ggplot2::element_text(size = 20, face = "bold"),
|
||
plot.background = ggplot2::element_rect(fill = "transparent", color = NA)
|
||
)
|
||
|
||
return(combined_plot)
|
||
}
|
||
|
||
#' Plot Annual Rainfall Comparison (Clipped Hexes)
|
||
#'
|
||
#' @description Creates a polished side-by-side comparison of annual precipitation.
|
||
#' Hexagons are spatially generated and clipped to the exact state boundaries
|
||
#' to eliminate "bleeding" edges. Uses \code{theme_forestry_void} with explicit
|
||
#' font sizing to match topographic maps.
|
||
#'
|
||
#' @param wa_data Dataframe containing Washington data (requires 'precip_annual', 'lat', 'lon').
|
||
#' @param ga_data Dataframe containing Georgia data (requires 'precip_annual', 'lat', 'lon').
|
||
#' @param boundaries An \code{sf} object containing state boundaries.
|
||
#' @param bins Integer. Number of hexes across the state width. Default is 30.
|
||
#' @param max_limit Numeric. The visual cap for rainfall (mm) to ensure comparable scales. Default is 2500.
|
||
#'
|
||
#' @importFrom dplyr filter mutate group_by summarize inner_join row_number
|
||
#' @importFrom sf st_as_sf st_transform st_crs st_make_grid st_intersection st_join st_drop_geometry
|
||
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_distiller guide_colorbar unit theme element_text element_rect margin coord_sf rel
|
||
#' @importFrom scales squish
|
||
#' @importFrom patchwork plot_layout plot_annotation
|
||
#'
|
||
#' @return A \code{patchwork} object containing the side-by-side comparison.
|
||
#' @export
|
||
plot_precip_hex_comparison <- function(wa_data, ga_data, boundaries, bins = 30, max_limit = 2500) {
|
||
|
||
# 1. Clean Data
|
||
wa_clean <- wa_data %>%
|
||
dplyr::filter(!is.na(precip_annual)) %>%
|
||
dplyr::mutate(precip_annual = as.numeric(precip_annual))
|
||
|
||
ga_clean <- ga_data %>%
|
||
dplyr::filter(!is.na(precip_annual)) %>%
|
||
dplyr::mutate(precip_annual = as.numeric(precip_annual))
|
||
|
||
# 2. Helper: Generate Clipped Hex Grid
|
||
get_clipped_hex_layer <- function(point_data, boundary_sf, n_bins) {
|
||
target_crs <- sf::st_crs(boundary_sf)
|
||
points_sf <- sf::st_as_sf(point_data, coords = c("lon", "lat"), crs = 4326) %>%
|
||
sf::st_transform(target_crs)
|
||
|
||
grid <- sf::st_make_grid(boundary_sf, n = n_bins, square = FALSE, flat_topped = TRUE) %>%
|
||
sf::st_as_sf() %>%
|
||
dplyr::mutate(hex_id = dplyr::row_number())
|
||
|
||
grid_clipped <- sf::st_intersection(grid, boundary_sf)
|
||
joined <- sf::st_join(points_sf, grid_clipped, join = sf::st_within)
|
||
|
||
hex_summary <- joined %>%
|
||
dplyr::filter(!is.na(hex_id)) %>%
|
||
dplyr::group_by(hex_id) %>%
|
||
dplyr::summarize(precip_mean = mean(precip_annual, na.rm = TRUE))
|
||
|
||
final_layer <- grid_clipped %>%
|
||
dplyr::inner_join(sf::st_drop_geometry(hex_summary), by = "hex_id")
|
||
|
||
return(final_layer)
|
||
}
|
||
|
||
# 3. Process States
|
||
wa_bnd <- boundaries %>% dplyr::filter(NAME == "Washington" | NAME == "WA")
|
||
ga_bnd <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
|
||
|
||
wa_hex_sf <- get_clipped_hex_layer(wa_clean, wa_bnd, bins)
|
||
ga_hex_sf <- get_clipped_hex_layer(ga_clean, ga_bnd, bins)
|
||
|
||
# 4. Plotting Helper
|
||
plot_hex_sf <- function(hex_data, boundary_sf) {
|
||
ggplot2::ggplot() +
|
||
ggplot2::geom_sf(data = hex_data, ggplot2::aes(fill = precip_mean), color = NA) +
|
||
ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
|
||
|
||
# [CRITICAL] Removes grid lines (datum=NA) and removes padding (expand=FALSE)
|
||
ggplot2::coord_sf(datum = NA, expand = FALSE) +
|
||
|
||
ggplot2::scale_fill_distiller(
|
||
palette = "Blues",
|
||
direction = 1,
|
||
limits = c(0, max_limit),
|
||
oob = scales::squish,
|
||
name = "Mean Annual Precip (mm)",
|
||
guide = ggplot2::guide_colorbar(
|
||
title.position = "top",
|
||
title.hjust = 0.5,
|
||
barwidth = ggplot2::unit(8, "cm"),
|
||
barheight = ggplot2::unit(0.4, "cm"),
|
||
frame.colour = "black",
|
||
ticks.colour = "black"
|
||
)
|
||
) +
|
||
# Use Base Size 16 to match Topo Plot
|
||
theme_forestry_void(base_size = 16) +
|
||
ggplot2::theme(legend.position = "none")
|
||
}
|
||
|
||
p_wa <- plot_hex_sf(wa_hex_sf, wa_bnd)
|
||
p_ga <- plot_hex_sf(ga_hex_sf, ga_bnd)
|
||
|
||
# 5. Patchwork Assembly
|
||
combined_plot <- (p_wa + p_ga) +
|
||
patchwork::plot_layout(guides = "collect") +
|
||
patchwork::plot_annotation(
|
||
tag_levels = 'a', tag_prefix = '(', tag_suffix = ')'
|
||
) &
|
||
# Ensure the combined plot also uses base_size 16
|
||
theme_forestry_void(base_size = 28)
|
||
|
||
return(combined_plot)
|
||
}
|
||
|
||
#' Get Major Peaks for Plotting
|
||
#'
|
||
#' Internal helper to return peak locations for WA and GA.
|
||
#'
|
||
#' @param state_name String. Either "Washington" or "Georgia".
|
||
#' @return An sf object containing peak names and locations.
|
||
#'
|
||
#' @importFrom tibble tibble
|
||
#' @importFrom sf st_as_sf
|
||
#' @noRd
|
||
get_state_peaks <- function(state_name) {
|
||
if (state_name == "Washington") {
|
||
tibble::tibble(
|
||
peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
|
||
lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
|
||
lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
|
||
) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
|
||
} else if (state_name == "Georgia") {
|
||
tibble::tibble(
|
||
peak = c("Brasstown Bald", "Rabun Bald", "Blood Mtn"),
|
||
lat = c(34.8743, 34.9660, 34.7398),
|
||
lon = c(-83.8111, -83.2996, -83.9367)
|
||
) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
|
||
} else { NULL }
|
||
}
|
||
|
||
#' Plot Spatial Autocorrelation Exploration
|
||
#'
|
||
#' @description Generates a Moran Scatterplot to visualize spatial
|
||
#' autocorrelation in elevation data using a 5km neighborhood.
|
||
#'
|
||
#' @param wa_data A dataframe or tibble containing elevation, lat, and lon columns.
|
||
#'
|
||
#' @return A ggplot object showing standardized elevation vs. spatially lagged elevation.
|
||
#'
|
||
#' @importFrom sf st_as_sf st_transform st_coordinates st_drop_geometry
|
||
#' @importFrom spdep dnearneigh nb2listw lag.listw card
|
||
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth geom_hline geom_vline labs theme_minimal
|
||
#' @importFrom dplyr mutate
|
||
#'
|
||
#' @export
|
||
plot_spatial_exploration <- function(wa_data) {
|
||
|
||
# 1. Convert to sf and project to meters (Washington State Plane North)
|
||
# This ensures the 5000 distance in dnearneigh is 5,000 meters.
|
||
wa_sf <- wa_data |>
|
||
sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
|
||
sf::st_transform(2285)
|
||
|
||
# 2. Extract coordinates and build weights (5km radius)
|
||
coords <- sf::st_coordinates(wa_sf)
|
||
nb <- spdep::dnearneigh(coords, 0, 5000)
|
||
|
||
# Remove points with zero neighbors to prevent divide-by-zero errors
|
||
ids_with_nb <- which(spdep::card(nb) > 0)
|
||
nb <- subset(nb, spdep::card(nb) > 0)
|
||
wa_sf <- wa_sf[ids_with_nb, ]
|
||
|
||
# Row-standardized weights matrix
|
||
lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
|
||
|
||
# 3. Prep data for plotting
|
||
plot_df <- wa_sf |>
|
||
sf::st_drop_geometry() |>
|
||
dplyr::mutate(
|
||
# Scale elevation to get z-scores
|
||
elev_scaled = as.vector(scale(elevation)),
|
||
# Calculate the spatial lag (average elevation of neighbors)
|
||
elev_lag = spdep::lag.listw(lw, elev_scaled, zero.policy = TRUE)
|
||
)
|
||
|
||
# 4. Build Plot Object
|
||
p <- ggplot2::ggplot(plot_df, ggplot2::aes(x = elev_scaled, y = elev_lag)) +
|
||
ggplot2::geom_point(alpha = 0.2, color = "#2c3e50") +
|
||
ggplot2::geom_smooth(
|
||
method = "lm",
|
||
color = "#e74c3c",
|
||
linetype = "dashed",
|
||
formula = y ~ x
|
||
) +
|
||
ggplot2::geom_hline(yintercept = 0, alpha = 0.3) +
|
||
ggplot2::geom_vline(xintercept = 0, alpha = 0.3) +
|
||
ggplot2::labs(
|
||
title = "Spatial Autocorrelation: Moran Scatterplot",
|
||
subtitle = "Washington Elevation (5km neighborhood)",
|
||
x = "Standardized Elevation (Z-score)",
|
||
y = "Spatially Lagged Elevation (Neighbors)"
|
||
) +
|
||
ggplot2::theme_minimal()
|
||
|
||
return(p)
|
||
} |