#' Helper: Save Plot for Quarto Slide #' #' @description Saves a ggplot object as a PNG, sized to fit comfortably #' below a standard slide title, with robust font handling. #' #' @param plot_obj The ggplot object to save. #' @param name A short descriptive name (e.g., "wa_ecoregions"). #' @param type Either "map" or "plot". Adds this prefix to the filename. #' @param width Width in inches (default: 10). #' @param height Height in inches (default: 6.18). #' @param dpi Resolution (default: 300). #' #' @return The full file path (invisibly). #' @export helper_save_fig <- function(plot_obj, name, type = c("map", "plot"), width = 10, height = 6.18, dpi = 300) { # 1. Input Validation & Directory Setup type <- match.arg(type) if (!dir.exists("figs")) dir.create("figs") # Construct filename: figs/map_washington.png filename <- paste0("figs/", type, "_", name, ".png") # 2. Force Fonts (The "Amnesia" Fix) # Critical: We must turn the engine on immediately before saving # to prevent the new graphics device from reverting to Arial. sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson") showtext::showtext_auto() showtext::showtext_opts(dpi = dpi) # 3. Save the File ggplot2::ggsave( filename = filename, plot = plot_obj, width = width, height = height, dpi = dpi, bg = "white" # Prevents transparent backgrounds ) message(paste("Saved:", filename)) return(invisible(filename)) } #' Register Project Fonts #' #' Registers 'Atkinson Hyperlegible Next' (Sans) and 'Atkinson Hyperlegible Mono' #' (Monospace) with the sysfonts package for use in R graphics. #' #' @return NULL (called for side effects) #' @export setup_forestry_fonts <- function() { font_dir <- "assets/fonts" if (!dir.exists(font_dir)) { warning(paste("Font directory not found at:", font_dir)) return(NULL) } # 1. Main Text Font (Sans) sysfonts::font_add( family = "atkinson", regular = file.path(font_dir, "AtkinsonHyperlegibleNext-Regular.ttf"), bold = file.path(font_dir, "AtkinsonHyperlegibleNext-Bold.ttf"), italic = file.path(font_dir, "AtkinsonHyperlegibleNext-Italic.ttf"), bolditalic = file.path(font_dir, "AtkinsonHyperlegibleNext-BoldItalic.ttf") ) # 2. Code/Number Font (Mono) sysfonts::font_add( family = "atkinson_mono", regular = file.path(font_dir, "AtkinsonHyperlegibleMono-Regular.ttf"), bold = file.path(font_dir, "AtkinsonHyperlegibleMono-Bold.ttf") ) # 3. Enable showtext rendering showtext::showtext_auto() showtext::showtext_opts(dpi = 300) } #' Standardized Void Theme (Maximal Data Ink) #' @description Removes axes/grids for shape-focused maps, but keeps project fonts. #' @export theme_forestry_void <- function(base_size = 16) { # 1. Start with the void (empty) theme ggplot2::theme_void(base_family = "atkinson", base_size = base_size) + # 2. Add back the specific text elements we need ggplot2::theme( # Keep the Legend styled correctly legend.position = "bottom", legend.title = ggplot2::element_text( family = "atkinson", face = "bold", size = ggplot2::rel(1) ), legend.text = ggplot2::element_text( family = "atkinson", size = ggplot2::rel(0.9) ), # Keep Titles (if you use them inside the plot) plot.title = ggplot2::element_text( family = "atkinson", face = "bold", hjust = 0.5, margin = ggplot2::margin(b = 5) ), # Zero margins to force edges to touch plot.margin = ggplot2::margin(5, 5, 5, 5) ) } #' Standard Forestry Plot Theme (Cowplot + Atkinson) #' #' @description A standardized theme for non-spatial plots (scatter, bar, line). #' Based on cowplot::theme_cowplot(), it includes clean axes and a minimalist look. #' Uses 'Atkinson Hyperlegible Next' for all text. #' #' @param font_size Integer. Base font size. Default is 14 (good for slides). #' @param grid Logical. If TRUE, adds a light gray grid (useful for presentations). #' #' @importFrom cowplot theme_cowplot background_grid #' @importFrom ggplot2 theme element_text element_rect margin #' @export theme_forestry_plot <- function(font_size = 14, grid = TRUE) { # 1. Ensure Fonts are Registered setup_forestry_fonts() # 2. Base Theme (Cowplot with Atkinson) # cowplot::theme_cowplot has a 'font_family' argument we can leverage directly base <- cowplot::theme_cowplot(font_size = font_size, font_family = "atkinson") + ggplot2::theme( # Fonts: We force family="atkinson" here just to be safe for overrides plot.title = ggplot2::element_text(family = "atkinson", face = "bold", hjust = 0.5, size = font_size + 2), axis.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size), legend.title = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size), legend.text = ggplot2::element_text(family = "atkinson", size = font_size - 1), # Optional: Use Monospace for axis numbers? (Uncomment if you want the "technical" look) # axis.text = ggplot2::element_text(family = "atkinson_mono", size = font_size - 1), # Backgrounds (Transparent for slides) plot.background = ggplot2::element_rect(fill = "transparent", color = NA), panel.background = ggplot2::element_rect(fill = "transparent", color = NA), # Margins legend.box.margin = ggplot2::margin(10, 10, 10, 10) ) # 3. Optional Grid if (grid) { base <- base + cowplot::background_grid(major = "y", minor = "none", color.major = "grey92") } return(base) } #' Standardized Spatial Theme (Atkinson) #' @description High-visibility map theme for presentations using Atkinson fonts. #' @importFrom ggplot2 theme_minimal theme element_line element_blank element_text element_rect margin unit #' @export theme_forestry_spatial <- function(base_size = 16) { # 1. Ensure Fonts are Registered setup_forestry_fonts() # 2. Base Theme with Relative Sizing ggplot2::theme_minimal(base_family = "atkinson", base_size = base_size) + ggplot2::theme( # Minimalist grid lines (Graticules) panel.grid.major = ggplot2::element_line(color = "grey90", linewidth = 0.2), panel.grid.minor = ggplot2::element_blank(), # Titles: Scale relative to base_size (12 * 1.5 = 18pt) plot.title = ggplot2::element_text( family = "atkinson", face = "bold", size = ggplot2::rel(1.5), color = "#2c3e50" ), # Axis Labels: Slightly smaller than base (12 * 0.8 = 9.6pt) axis.text = ggplot2::element_text( family = "atkinson", size = ggplot2::rel(0.8), color = "#5d6d7e" ), # Legend: Scale relative to base legend.position = "right", legend.title = ggplot2::element_text( family = "atkinson", size = ggplot2::rel(1.1), # ~13.2pt face = "bold" ), legend.text = ggplot2::element_text( family = "atkinson", size = ggplot2::rel(0.9) # ~10.8pt ), # Clean borders panel.border = ggplot2::element_rect(color = "grey80", fill = NA, linewidth = 0.5), plot.margin = ggplot2::margin(5, 5, 5, 5) ) } #' Simplified Theme Diagnostic #' @description Uses built-in NC data to verify theme_forestry_spatial. #' @importFrom sf st_read st_crs #' @importFrom ggplot2 ggplot geom_sf labs coord_sf scale_x_continuous scale_y_continuous #' @export plot_theme_diagnostic <- function() { # Pull NC data included with the sf package nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) # Render with your spatial theme ggplot2::ggplot(nc) + ggplot2::geom_sf(fill = "#27ae60", alpha = 0.3, color = "#2c3e50") + # Force WGS84 graticules (Decimal Degrees) ggplot2::coord_sf(datum = sf::st_crs(4326)) + theme_forestry_spatial() + ggplot2::labs( title = "Theme Diagnostic: North Carolina", subtitle = "Testing 24pt Title and 14pt Graticule Labels" ) } #' Combine Washington and Georgia Forest Data #' #' Merges the Washington and Georgia datasets into a single data frame, adding a #' column to identify the source state. #' #' @param wa_data A data frame containing the Washington forest inventory data. #' @param ga_data A data frame containing the Georgia forest inventory data. #' #' @return A single combined data frame with an additional column `.id` #' (renamed to "state") indicating the source ("WA" or "GA"). #' #' @examples #' \dontrun{ #' combined <- combine_forest(wa_raw, ga_raw) #' table(combined$state) #' } #' #' @export combine_forest <- function(wa_data, ga_data){ dplyr::bind_rows( list(WA = wa_data, GA = ga_data), .id = "state" ) } #' Format Statistical Summary Table #' #' @description Calculates descriptive statistics and returns a formatted `gt` table #' ready for slide presentation. #' #' @param data A data frame or sf object. #' @return A `gt` table object. #' #' @importFrom dplyr select arrange desc any_of where #' @importFrom sf st_drop_geometry #' @importFrom psych describe #' @importFrom tibble rownames_to_column #' @importFrom gt gt fmt_number cols_label tab_options google_font #' @export format_summary_table <- function(data) { # 1. Calculate Stats (The Logic) stats_df <- data %>% sf::st_drop_geometry() %>% dplyr::select(dplyr::where(is.numeric)) %>% dplyr::select(-dplyr::any_of(c("year", "lon", "lat"))) %>% psych::describe() %>% as.data.frame() %>% tibble::rownames_to_column(var = "Variable") %>% dplyr::select(-vars, -range, -se, -mad) %>% dplyr::arrange(dplyr::desc(abs(kurtosis))) # 2. Format Table (The Presentation) stats_df %>% gt::gt() %>% gt::opt_table_font( font = gt::google_font("Atkinson Hyperlegible") ) %>% # Clean up column labels gt::cols_label( Variable = "Variable", n = "N", mean = "Mean", sd = "SD", median = "Median", trimmed = "Trimmed", min = "Min", max = "Max", skew = "Skew", kurtosis = "Kurtosis" ) %>% # Format all numbers to 2 decimal places gt::fmt_number( columns = dplyr::where(is.numeric), decimals = 2 ) %>% # Format 'N' to 0 decimals gt::fmt_number( columns = "n", decimals = 0 ) %>% # Styling: Increase font size and use your project font gt::tab_options( table.font.names = "Atkinson Hyperlegible", table.font.size = "22px", # <--- Increases size to fill the slide data_row.padding = "8px" # Adds breathing room ) %>% gt::text_transform( locations = gt::cells_body(columns = dplyr::where(is.numeric)), fn = function(x) { # \u2212 is the unicode minus that breaks things. Replace with standard "-". gsub("\u2212", "-", x) } ) } #' Plot US Map with Forestry Theme #' @description Highlights Washington and Georgia using standardized presentation fonts. #' @importFrom tigris states shift_geometry #' @importFrom rmapshaper ms_simplify ms_filter_islands #' @importFrom dplyr filter mutate #' @importFrom ggplot2 ggplot geom_sf aes scale_fill_manual labs #' @export plot_us_map <- function() { # 1. Fetch and filter for the Lower 48 us_map <- tigris::states(cb = TRUE, resolution = "20m", year = 2022, progress_bar = FALSE) %>% dplyr::filter(!STUSPS %in% c("AK", "HI", "PR")) %>% rmapshaper::ms_simplify(keep = 0.2) %>% rmapshaper::ms_filter_islands(min_area = 1e9) %>% tigris::shift_geometry() %>% dplyr::mutate( highlight = dplyr::case_when( STUSPS == "WA" ~ "Washington", STUSPS == "GA" ~ "Georgia", TRUE ~ NA_character_ ) ) # 2. Define Colors state_colors <- c("Washington" = "#E16A86", "Georgia" = "#00AD9A") # 3. Plot ggplot2::ggplot() + # Layer 1: Base Map ggplot2::geom_sf(data = us_map, fill = "grey98", color = "grey50", linewidth = 0.25) + # Layer 2: Highlight Layer ggplot2::geom_sf( data = dplyr::filter(us_map, !is.na(highlight)), ggplot2::aes(fill = highlight), color = "grey50", linewidth = 0.25 ) + # Custom Scales ggplot2::scale_fill_manual(values = state_colors, name = "Study Sites") + # Standardized Forestry Theme theme_forestry_spatial() } #' Create Single State Topo Plot #' @description Generates a topo map with manually tuned label placement for Georgia. #' @export plot_state_topo <- function(data, boundary_sf, raster_path, state_name) { # 1. Load & Process Raster elev_terra <- terra::rast(raster_path) slope <- terra::terrain(elev_terra, "slope", unit = "radians") aspect <- terra::terrain(elev_terra, "aspect", unit = "radians") hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315) # 2. Prep Vectors boundary_sf <- sf::st_transform(boundary_sf, 4326) peaks_sf <- get_state_peaks(state_name) # --- FIX: Manual Nudge Logic --- # Define default nudges (0 means "let ggrepel decide") nx <- 0 ny <- 0 if (state_name == "Georgia" && !is.null(peaks_sf)) { # Prepare nudge vectors specific to the Georgia peaks to uncross the lines # Units are in Decimal Degrees (roughly: 0.1 deg ~ 11km) peaks_sf <- peaks_sf %>% dplyr::mutate( nudge_x = dplyr::case_when( peak == "Rabun Bald" ~ 0.3, # Push Hard Right (East) peak == "Blood Mtn" ~ -0.3, # Push Hard Left (West) peak == "Brasstown Bald" ~ 0, # Keep Center horizontally TRUE ~ 0 ), nudge_y = dplyr::case_when( peak == "Brasstown Bald" ~ 0.2, # Push Up (North) peak == "Rabun Bald" ~ -0.1, # Push Down slightly TRUE ~ 0 ) ) # Extract as vectors for ggrepel nx <- peaks_sf$nudge_x ny <- peaks_sf$nudge_y } # 3. Plot ggplot2::ggplot() + tidyterra::geom_spatraster(data = elev_terra) + tidyterra::scale_fill_hypso_c( palette = "usgs-gswa2", name = "Elevation (m)", na.value = "transparent" ) + tidyterra::geom_spatraster( data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE ) + ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) + ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) + # REPEL SETTINGS {if(!is.null(peaks_sf)) ggrepel::geom_label_repel( data = peaks_sf, ggplot2::aes(label = peak, geometry = geometry), stat = "sf_coordinates", size = 6, family = "atkinson", fontface = "bold", # Inject the manual nudges here nudge_x = nx, nudge_y = ny, box.padding = 0.8, point.padding = 0.5, force = 10, min.segment.length = 0 )} + theme_forestry_void(base_size = 16) + ggplot2::coord_sf(expand = FALSE) + ggplot2::guides( fill = ggplot2::guide_colorbar( title.position = "top", title.hjust = 0.5, barwidth = ggplot2::unit(5, "cm"), barheight = ggplot2::unit(0.3, "cm"), frame.colour = "black", ticks.colour = "black" ) ) + ggplot2::theme( legend.position = "bottom", legend.margin = ggplot2::margin(t = 5, b = 0) ) } #' Save Combined Side-by-Side Topo Plot #' @export save_combined_topo <- function(wa_data, ga_data, wa_boundary, ga_boundary, wa_raster_path, ga_raster_path, output_path) { # [FIX 2] Ensure fonts are registered & showtext is ON for this worker process setup_forestry_fonts() p_wa <- plot_state_topo(wa_data, wa_boundary, wa_raster_path, "Washington") p_ga <- plot_state_topo(ga_data, ga_boundary, ga_raster_path, "Georgia") combined_plot <- patchwork::wrap_plots(p_wa, p_ga, ncol = 2) + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')', theme = theme_forestry_void(base_size = 20) ) dir.create(dirname(output_path), showWarnings = FALSE) # Save ggplot2::ggsave(output_path, plot = combined_plot, width = 12, height = 6, dpi = 300, bg = "white") return(output_path) } #' Plot Regional Comparison of Forested Data (WA vs GA) #' #' @description Generates a side-by-side comparison of forest cover for Washington #' and Georgia. It handles font registration (Atkinson Hyperlegible), spatial #' transformations, and creates a combined plot with a shared legend. #' #' @param data A data frame containing the forest point data. Must contain #' columns `lon`, `lat`, `state`, and `forested`. #' @param boundaries An `sf` object containing state boundaries. Must contain #' a `NAME` column. #' #' @return A `patchwork` object containing the combined side-by-side maps. #' #' @importFrom sysfonts font_add_google #' @importFrom showtext showtext_auto #' @importFrom dplyr filter #' @importFrom sf st_as_sf st_transform st_crs #' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual coord_sf guides guide_legend theme_void theme element_text #' @importFrom patchwork wrap_plots plot_layout plot_annotation #' #' @export plot_regional_comparison <- function(data, boundaries) { # Load the font from Google Fonts (or a local file) font_add_google("Atkinson Hyperlegible", family = "atkinson") # Turn on showtext to render the font showtext_auto() # Spatial Prep (Fixing the NAD83/WGS84 mismatch) forest_sf <- data %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_transform(sf::st_crs(boundaries)) state_lookup <- c("Washington" = "WA", "Georgia" = "GA") # Washington Plot abbr_wa <- state_lookup["Washington"] p_wa <- ggplot2::ggplot() + # State Boundary ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Washington"), fill = "grey98", color = "grey80") + # Forest Points ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_wa), ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) + # Scales ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"), name = "Forested") + ggplot2::coord_sf(expand = TRUE) + # Legend Tweaks ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) + theme_void() # Georgia Plot abbr_ga <- state_lookup["Georgia"] p_ga <- ggplot2::ggplot() + # State Boundary ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Georgia"), fill = "grey98", color = "grey80") + # Forest Points ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_ga), ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) + # Scales ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"), name = "Forested") + ggplot2::coord_sf(expand = TRUE) + # Legend Tweaks ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) + theme_void() # Combine final_plot <- patchwork::wrap_plots(p_wa, p_ga) + patchwork::plot_layout(guides = "collect") + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')' ) & ggplot2::theme( text = element_text(family = "atkinson", size = 16), legend.position = "bottom" ) return(final_plot) } #' Download EPA Level III Ecoregions Data #' #' @description Downloads the EPA Level III Ecoregions shapefile (zip format) #' to a local directory. Implements a caching check to avoid re-downloading #' if the file already exists. #' #' @param url Character string. The direct URL to the EPA Ecoregions zip file. #' @param dest_dir Character string. The local directory where the data should #' be saved. Defaults to "data/epa". #' #' @return A character string containing the full file path to the downloaded zip file. #' This return value is designed to be tracked by \code{targets}. #' #' @importFrom utils download.file #' @export get_epa_ecoregions <- function(url, dest_dir = "data/epa") { # Ensure directory exists if (!dir.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE) # Define destination dest_file <- file.path(dest_dir, "us_eco_l3.zip") # Download only if missing if (!file.exists(dest_file)) { message("Downloading EPA Ecoregion Data...") download.file(url, dest_file, mode = "wb") } # Return the file path for the next target to track return(dest_file) } #' Process and Clip EPA Ecoregions #' #' @description Extracts EPA Level III ecoregion data from a zipped shapefile, #' standardizes column names, and clips the geometry to specified state boundaries. #' It includes robust steps for geometry repair (handling spherical validity), #' small island removal, and simplification for optimized plotting. #' #' @param zip_path Character string. The file path to the zipped EPA shapefile. #' @param target_states Character vector. The names of the states to clip the #' ecoregions to. Defaults to \code{c("Washington", "Georgia")}. #' @param simplify_tol Numeric. The simplification tolerance passed to #' \code{rmapshaper::ms_simplify}. Range is 0-1, where higher numbers remove more detail. #' Defaults to 0.05. #' #' @return An \code{sf} object containing the processed ecoregions with standardized #' columns \code{US_L3NAME} and \code{STATE_NAME}. #' #' @importFrom tigris states #' @importFrom dplyr filter rename select %>% #' @importFrom rmapshaper ms_simplify ms_filter_islands #' @importFrom sf read_sf st_transform st_crs st_make_valid st_intersection sf_use_s2 #' @importFrom utils unzip #' @export process_ecoregions <- function(zip_path, target_states = c("Washington", "Georgia"), simplify_tol = 0.05) { message("Processing shapefiles for: ", paste(target_states, collapse = ", ")) # A. Fetch State Boundaries (The "Cookie Cutter") boundaries <- tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>% filter(NAME %in% target_states) %>% # Simplify boundaries to match the ecoregion resolution rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE) # B. Unzip and Read EPA Data clean_dir <- tempfile() dir.create(clean_dir) unzip(zip_path, exdir = clean_dir) shp_file <- list.files(clean_dir, pattern = "\\.shp$", full.names = TRUE, recursive = TRUE)[1] eco_raw <- sf::read_sf(shp_file) # C. Standardize Columns (Handle variations in column names) if (!"US_L3NAME" %in% names(eco_raw)) { if ("NA_L3NAME" %in% names(eco_raw)) { eco_raw <- rename(eco_raw, US_L3NAME = NA_L3NAME) } else if ("LEVEL3_NAM" %in% names(eco_raw)) { eco_raw <- rename(eco_raw, US_L3NAME = LEVEL3_NAM) } } # D. Clip, Clean, and Simplify suppressMessages(sf::sf_use_s2(FALSE)) # Turn off spherical geometry for robust clipping eco_clean <- eco_raw %>% st_transform(st_crs(boundaries)) %>% st_make_valid() %>% st_intersection(boundaries) %>% select(US_L3NAME, STATE_NAME = NAME) %>% rmapshaper::ms_filter_islands(min_area = 200000000) %>% rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE) suppressMessages(sf::sf_use_s2(TRUE)) return(eco_clean) } #' Plot Ecoregion Complexity Comparison (WA vs GA) #' #' @description Generates a side-by-side comparison of Level III ecoregions for Washington #' and Georgia. It uses a "void" theme, qualitative colors, and carefully tuned #' label repulsion settings to avoid overlapping text. #' #' @param eco_data An \code{sf} object containing ecoregion polygons. Must contain #' columns \code{STATE_NAME} and \code{US_L3NAME}. #' #' @return A \code{patchwork} object containing the combined plot. #' #' @importFrom dplyr filter group_by summarize mutate #' @importFrom sf st_transform st_union st_point_on_surface st_coordinates st_drop_geometry #' @importFrom stringr str_wrap #' @importFrom ggplot2 ggplot geom_sf aes coord_sf theme_void scale_fill_discrete guide_none element_text margin element_rect labs #' @importFrom ggrepel geom_label_repel #' @importFrom colorspace scale_fill_discrete_qualitative #' @importFrom patchwork plot_annotation #' #' @export plot_ecoregion_comparison <- function(eco_data) { # 1. WASHINGTON PLOT # A. Filter & Transform wa_e <- eco_data %>% dplyr::filter(STATE_NAME == "Washington") %>% sf::st_transform(4326) # B. Calculate Centroids for Labels wa_labels <- wa_e %>% dplyr::group_by(US_L3NAME) %>% dplyr::summarize(geometry = sf::st_union(geometry)) %>% dplyr::mutate( lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1], lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2], clean_label = stringr::str_wrap(US_L3NAME, width = 35) ) %>% sf::st_drop_geometry() # C. Generate Plot p_wa <- ggplot2::ggplot() + # Geometries ggplot2::geom_sf(data = wa_e, ggplot2::aes(fill = US_L3NAME), alpha = 1, color = "white", linewidth = 0.2) + ggplot2::coord_sf(datum = NA, expand = TRUE) + # Repelled Labels ggrepel::geom_label_repel( data = wa_labels, ggplot2::aes(x = lon, y = lat, label = clean_label), inherit.aes = FALSE, size = 5, family = "atkinson", lineheight = 1.6, box.padding = 0.75, min.segment.length = 0, force = 30, max.overlaps = Inf, alpha = 0.95, segment.color = "grey30", segment.size = 1, seed = 42 ) + # Colors colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") + # Manual Theme ggplot2::theme_void(base_family = "atkinson") # 2. GEORGIA PLOT # A. Filter & Transform ga_e <- eco_data %>% dplyr::filter(STATE_NAME == "Georgia") %>% sf::st_transform(4326) # B. Calculate Centroids for Labels ga_labels <- ga_e %>% dplyr::group_by(US_L3NAME) %>% dplyr::summarize(geometry = sf::st_union(geometry)) %>% dplyr::mutate( lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1], lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2], clean_label = stringr::str_wrap(US_L3NAME, width = 35) ) %>% sf::st_drop_geometry() # C. Generate Plot p_ga <- ggplot2::ggplot() + # Geometries ggplot2::geom_sf(data = ga_e, ggplot2::aes(fill = US_L3NAME), alpha = 1, color = "white", linewidth = 0.2) + ggplot2::coord_sf(datum = NA, expand = TRUE) + # Repelled Labels ggrepel::geom_label_repel( data = ga_labels, ggplot2::aes(x = lon, y = lat, label = clean_label), inherit.aes = FALSE, size = 5, family = "atkinson", lineheight = 1.6, box.padding = 0.75, min.segment.length = 0, force = 40, max.overlaps = Inf, alpha = 0.95, segment.color = "grey30", segment.size = 1, seed = 4 ) + # Colors colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") + # Manual Theme ggplot2::theme_void(base_family = "atkinson") + ggplot2::theme( plot.title = ggplot2::element_text(face = "bold", margin = ggplot2::margin(b = 10)), plot.tag = ggplot2::element_text(face = "bold"), legend.position = "bottom" ) + ggplot2::labs(x = NULL, y = NULL) # 3. COMBINE & ANNOTATE combined_plot <- (p_wa + p_ga) + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')' ) & # Apply theme settings globally to the patchwork ggplot2::theme_void(base_family = "atkinson") return(combined_plot) } #' Plot Univariate Distributions by Forest Cover #' #' @description Generates a faceted density plot comparing numeric environmental #' variables between forested and non-forested areas. It uses the "Dark 3" #' qualitative palette and applys the Wilke cowplot theme. #' #' @param data A data frame or sf object containing the forest data. #' Must include `forested` column and numeric environmental variables. #' #' @return A `ggplot` object. #' #' @importFrom dplyr select starts_with #' @importFrom tidyr pivot_longer #' @importFrom ggplot2 ggplot aes geom_density facet_wrap scale_fill_manual labs #' @importFrom cowplot theme_cowplot #' @importFrom colorspace qualitative_hcl #' @importFrom sysfonts font_add_google #' @importFrom showtext showtext_auto #' #' @export plot_forest_distributions <- function(data) { # 1. Typography Setup sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson") showtext::showtext_auto() # 2. Color Palette (Dark 3 from colorspace) # Generating the hex codes specifically for 2 levels pal_colors <- colorspace::qualitative_hcl(palette = "Dark 3", n = 2) # 3. Plotting data %>% dplyr::select( forested, elevation, eastness, northness, roughness, dew_temp, precip_annual, dplyr::starts_with("temp_"), dplyr::starts_with("vapor_") ) %>% tidyr::pivot_longer( cols = -forested, names_to = "variable", values_to = "value" ) %>% ggplot2::ggplot(ggplot2::aes(x = value, fill = forested)) + # color = NA removes the outline for a cleaner "Wilke-style" look ggplot2::geom_density(alpha = 0.6, color = NA) + ggplot2::facet_wrap(~ variable, scales = "free") + # Scales & Labels ggplot2::scale_fill_manual(values = pal_colors) + ggplot2::labs( x = NULL, y = "Density", fill = "Forested" ) + ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05))) + ggplot2::scale_x_continuous( expand = ggplot2::expansion(mult = c(0, 0)), labels = scales::label_number(scale_cut = scales::cut_short_scale())) + # Theme cowplot::theme_cowplot(font_family = "atkinson", font_size = 22) + # Minor tweaks to spacing ggplot2::theme( legend.position = "bottom", strip.background = ggplot2::element_blank(), # Clean facet headers strip.text = ggplot2::element_text(face = "bold") ) } identify_outliers <- function(data){ # 1. Prepare the data (calculate Z-scores first) plot_data <- data %>% sf::st_drop_geometry() %>% dplyr::select(forested, where(is.numeric)) %>% dplyr::select(-any_of(c("lon", "lat", "year"))) %>% tidyr::pivot_longer(cols = -forested, names_to = "variable", values_to = "value") %>% dplyr::group_by(variable) %>% # Wrap scale in as.numeric to fix the matrix warning dplyr::mutate(scaled_value = as.numeric(scale(value))) %>% dplyr::ungroup() # 2. Plot ggplot2::ggplot(plot_data, aes(x = forested, y = scaled_value, fill = forested)) + # Draw the main distribution (suppress default outliers) ggplot2::geom_boxplot(outlier.shape = NA, alpha = 0.4) + # CRITICAL FIX: Only pass the extreme values to the points layer ggplot2::geom_jitter( data = dplyr::filter(plot_data, abs(scaled_value) > 3), width = 0.2, alpha = 0.6, size = 1.5 # Make the outliers pop ) + ggplot2::facet_wrap(~variable, scales = "free_y") + ggplot2::theme_minimal() + ggplot2::scale_fill_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) + ggplot2::labs( title = "Standardized Distribution & Extremes", subtitle = "Points indicate observations > 3 Standard Deviations from mean", y = "Z-Score (σ)" ) } fetch_study_area <- function(target_states) { tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>% dplyr::filter(NAME %in% target_states) %>% rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE) } fetch_state_boundary <- function(state){ tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>% dplyr::filter(NAME == state) %>% rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE) } create_elevation_raster <- function(boundary_sf, path) { elev_raster <- elevatr::get_elev_raster(locations = boundary_sf, z = 7, clip = "locations") elev_terra <- terra::rast(elev_raster) %>% terra::mask(terra::vect(boundary_sf)) terra::writeRaster(elev_terra, path, overwrite = TRUE) return(path) } #' Save Outlier Diagnostic Map #' #' @description Generates a diagnostic map highlighting multivariate outliers (Z > 3) #' overlaid on a hillshaded elevation raster. Uses the standardized forestry theme. #' #' @param data A data frame containing the analysis dataset (must include numeric columns and 'forested' factor). #' @param boundary_sf An \code{sf} object representing the state boundary (e.g., Washington). #' @param raster_path Character string. File path to the elevation raster (.tif). #' @param output_path Character string. File path where the PNG will be saved. #' #' @importFrom terra rast terrain shade #' @importFrom sf st_transform st_as_sf st_filter st_drop_geometry #' @importFrom dplyr select filter if_any all_of mutate #' @importFrom tibble tibble #' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_manual labs ggsave guides guide_legend #' @importFrom tidyterra geom_spatraster scale_fill_hypso_c #' @importFrom ggrepel geom_label_repel #' #' @return The \code{output_path} (invisible), for integration with \code{targets}. #' @export save_outlier_map_png <- function(data, boundary_sf, raster_path, output_path) { # 1. Load & Process Raster elev_terra <- terra::rast(raster_path) slope <- terra::terrain(elev_terra, "slope", unit = "radians") aspect <- terra::terrain(elev_terra, "aspect", unit = "radians") hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315) # 2. Prep Vectors wa_boundary_sf <- sf::st_transform(boundary_sf, 4326) data_sf <- data %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_filter(wa_boundary_sf) # Identify numeric columns automatically numeric_cols <- data_sf %>% sf::st_drop_geometry() %>% dplyr::select(where(is.numeric)) %>% names() # Filter for >3 Z-score outliers_sf <- data_sf %>% dplyr::filter(dplyr::if_any(dplyr::all_of(numeric_cols), ~ abs(scale(.)) > 3)) # WA Peak Labels wa_peaks_sf <- tibble::tibble( peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"), lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013), lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111) ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) # 3. Plot plt <- ggplot2::ggplot() + # Layer A: Elevation Color tidyterra::geom_spatraster(data = elev_terra) + tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") + # Layer B: Hillshade Shadow Mask tidyterra::geom_spatraster( data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE ) + ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) + # Layer C: Vectors (Boundary + Outlier Points) ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) + ggplot2::geom_sf(data = outliers_sf, ggplot2::aes(color = forested), size = 2.5) + ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) + # Layer D: Labels ggrepel::geom_label_repel( data = wa_peaks_sf, ggplot2::aes(label = peak, geometry = geometry), stat = "sf_coordinates", size = 5, fontface = "bold", box.padding = 0.6, min.segment.length = 0 ) + # Theme Application theme_forestry_spatial() + labs(x = "", y = "") # Save ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white") return(output_path) } plot_wa_pca <- function(data) { # REVISED: Keep Lat/Lon in the numeric matrix numeric_mat <- data %>% sf::st_drop_geometry() %>% dplyr::select(where(is.numeric)) %>% dplyr::select(-any_of(c("year"))) pca_res <- stats::prcomp(numeric_mat, scale. = TRUE) # Calculate variance var_exp <- round(100 * pca_res$sdev^2 / sum(pca_res$sdev^2), 1) # Calculate the sum for the reader total_var <- var_exp[1] + var_exp[2] as.data.frame(pca_res$x) %>% dplyr::mutate(forested = data$forested) %>% ggplot2::ggplot(aes(x = PC1, y = PC2, color = forested)) + ggplot2::geom_point(alpha = 0.5) + ggplot2::scale_color_manual(values = c("Yes" = "#2D5A27", "No" = "#A0522D")) + ggplot2::theme_minimal(base_size = 14) + ggplot2::labs( title = "PCA: Biophysical + Geography", # REVISED SUBTITLE: Sums it up cleanly subtitle = paste0("First two components capture ", total_var, "% of total variance"), x = paste0("PC1 (", var_exp[1], "%)"), y = paste0("PC2 (", var_exp[2], "%)") ) + ggplot2::scale_x_continuous(limits = c(-10, 5)) } plot_correlations <- function(data) { # 1. Data Prep (Same as before) cor_data <- data %>% sf::st_drop_geometry() %>% dplyr::mutate(Forested_Binary = dplyr::if_else(forested == "Yes", 1, 0)) %>% dplyr::select(Forested_Binary, dplyr::where(is.numeric)) %>% dplyr::select(-any_of(c("year"))) cor_df <- cor(cor_data, use = "pairwise.complete.obs") %>% as.data.frame() %>% dplyr::select(correlation = Forested_Binary) %>% tibble::rownames_to_column("variable") %>% dplyr::filter(variable != "Forested_Binary") %>% dplyr::mutate( sign = dplyr::if_else(correlation > 0, "Positive", "Negative"), is_spatial = dplyr::if_else(variable %in% c("lat", "lon"), "Spatial", "Biophysical") ) # 2. Plot with SPACE ggplot2::ggplot(cor_df, aes(x = reorder(variable, correlation), y = correlation)) + ggplot2::geom_segment(aes(xend = variable, yend = 0), color = "gray50") + ggplot2::geom_point(aes(color = sign, shape = is_spatial), size = 5) + ggplot2::coord_flip() + ggplot2::theme_minimal(base_size = 18) + ggplot2::scale_color_manual(values = c("Positive" = "#228B22", "Negative" = "#D2691E")) + ggplot2::labs( title = "What drives the 'Forested' classification?", subtitle = "Correlations with Forested (Yes=1). Note the impact of Latitude.", x = NULL, y = "Correlation Coefficient", color = "Direction", shape = "Variable Type" ) } #' Plot Random Forest Variable Importance #' #' @description Fits a ranger Random Forest model to the provided data, calculates #' permutation importance, and generates a lollipop chart. It distinguishes #' between spatial (lat/lon) and biophysical predictors. #' Uses the project's 'Atkinson' font theme via theme_forestry_plot(). #' #' @param data An sf object or data frame containing the 'forested' target and predictors. #' #' @importFrom ranger ranger #' @importFrom vip vi #' @importFrom sf st_drop_geometry #' @importFrom dplyr select mutate if_else any_of where #' @importFrom ggplot2 ggplot aes geom_segment geom_point scale_shape_manual scale_color_manual labs #' @export plot_rf_importance <- function(data) { # 1. Clean Data & Fit Model model_data <- data %>% sf::st_drop_geometry() %>% dplyr::select(forested, dplyr::where(is.numeric)) %>% dplyr::select(-dplyr::any_of(c("year"))) %>% dplyr::mutate(forested = as.factor(forested)) rf_model <- ranger::ranger( formula = forested ~ ., data = model_data, importance = "permutation", seed = 123 ) # 2. Extract importance & Add Spatial Flag imp_df <- vip::vi(rf_model) %>% dplyr::mutate( Type = dplyr::if_else(Variable %in% c("lat", "lon"), "Spatial", "Biophysical") ) # 3. Tufte-style Lollipop with Shapes ggplot2::ggplot(imp_df, ggplot2::aes(x = Importance, y = reorder(Variable, Importance))) + # Stems ggplot2::geom_segment(ggplot2::aes(xend = 0, yend = Variable), color = "gray60", linewidth = 0.8) + # Dots (Mapped to Shape) ggplot2::geom_point( ggplot2::aes(shape = Type, color = Type), size = 5 ) + # --- VISUAL MAPPING --- ggplot2::scale_shape_manual(values = c("Biophysical" = 16, "Spatial" = 17)) + ggplot2::scale_color_manual(values = c("Biophysical" = "#228B22", "Spatial" = "black")) + # --- THEME FIX --- # We use your custom theme, which now enforces Atkinson fonts. # turning off grid because lollipops look cleaner without it theme_forestry_plot(font_size = 16, grid = FALSE) + ggplot2::labs( title = "Random Forest Variable Importance", subtitle = "Permutation Importance. Note the high rank of spatial variables.", y = NULL, x = "Importance (Loss in Accuracy if scrambled)", shape = "Variable Type", color = "Variable Type" ) } plot_umap_forested <- function(data) { data_clean <- data %>% drop_na() numeric_data <- data_clean %>% select(where(is.numeric)) %>% scale() set.seed(42) umap_fit <- umap::umap(numeric_data) # Format for plotting umap_df <- umap_fit$layout %>% as.data.frame() %>% dplyr::rename(UMAP1 = V1, UMAP2 = V2) %>% dplyr::mutate(forested = as.factor(data_clean$forested)) # Generate Plot ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = forested)) + geom_point(alpha = 0.6, size = 1.5) + #scale_color_viridis_d(name = "Forested Status", begin = 0.2, end = 0.8) + scale_color_manual( name = "Forested?", # Assumes factor order is (No, Yes). Swap colors if order differs. values = c("#228B22", "#D2691E") ) + theme_minimal() + labs( title = "UMAP Projection of Forested Areas", subtitle = "Clustering based on numeric landscape features", x = "UMAP Dimension 1", y = "UMAP Dimension 2" ) } plot_cv_strategies <- function(data) { # 1. SETUP DATA & ADD ID # We MUST add an ID column to track rows across splits forested_sf <- st_as_sf( data, coords = c("lon", "lat"), crs = 4326, remove = FALSE ) %>% mutate(id = row_number()) # <--- Critical: Adds the ID we need later # 2. DEFINE CUSTOM ECOLOGICAL DISTANCE dist_env <- function(x) { x %>% st_drop_geometry() %>% select(elevation, precip_annual, temp_annual_mean, roughness) %>% scale() %>% dist() } # 3. CREATE THE SPLITS set.seed(42) # A. Random random_folds <- vfold_cv(forested_sf, v = 5) # B. Spatial Block spatial_folds <- spatial_block_cv(forested_sf, v = 5) # C. Environmental Clustering cluster_folds <- spatial_clustering_cv( forested_sf, v = 5, cluster_function = "hclust", distance_function = dist_env ) # 4. HELPER: EXTRACT FOLD IDs # This assigns a Fold Number (1-5) to every single point get_fold_ids <- function(splits, original_data) { results <- original_data %>% mutate(fold = NA_character_) for(i in seq_along(splits$splits)) { test_ids <- assessment(splits$splits[[i]]) %>% pull(id) results <- results %>% mutate(fold = ifelse(id %in% test_ids, as.character(i), fold)) } return(results) } # 5. PREPARE PLOTTING DATA df_random <- get_fold_ids(random_folds, forested_sf) df_spatial <- get_fold_ids(spatial_folds, forested_sf) df_cluster <- get_fold_ids(cluster_folds, forested_sf) # 6. COMMON PLOT FUNCTION plot_folds <- function(df, title) { ggplot(df) + geom_sf(aes(color = fold), size = 0.5, alpha = 0.6) + scale_color_brewer(palette = "Set1", name = "Fold") + # <--- The Rainbow Colors #scale_color_discrete_qualitative(palette = "Harmonic", name = "Fold") + #scale_color_discrete_qualitative(palette = "Dark 3", name = "Fold") + #scale_color_discrete_qualitative(palette = "Dynamic", name = "Fold") + ggtitle(title) + theme_void() + theme( plot.title = element_text(hjust = 0.5, face = "bold", size = 12), legend.position = "none" ) } # 7. GENERATE & COMBINE p1 <- plot_folds(df_random, "Random\n(Confetti)") p2 <- plot_folds(df_spatial, "Spatial Block\n(Checkerboard)") p3 <- plot_folds(df_cluster, "Env. Clustering\n(Ecoregions)") p1 + p2 + p3 + plot_layout(ncol = 3) } plot_spatial_cv_comparison <- function(res_random, res_block, res_cluster) { # 1. Extract & Clean Data all_metrics <- bind_rows( res_random %>% collect_metrics() %>% mutate(strategy = "Random CV"), res_block %>% collect_metrics() %>% mutate(strategy = "Block CV"), res_cluster %>% collect_metrics() %>% mutate(strategy = "Cluster CV") ) %>% filter(.metric == "roc_auc") %>% mutate( recipe_label = case_when( str_detect(wflow_id, "^base") ~ "Coords", str_detect(wflow_id, "^non_spatial") ~ "No Coords", str_detect(wflow_id, "^extensible") ~ "Extensible", TRUE ~ "Unknown" ), model = str_remove(wflow_id, "(base_|non_spatial_|extensible_)"), strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")), recipe_label = factor(recipe_label, levels = c("Coords", "No Coords", "Extensible")) ) # 2. Generate Plot ggplot(all_metrics, aes(x = model, y = mean, color = model)) + geom_hline(yintercept = 0.96, linetype = "dashed", color = "gray50", alpha = 0.5) + geom_point(size = 4) + geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), width = 0.2) + scale_color_discrete_qualitative(palette = "Dark 3") + facet_grid(recipe_label ~ strategy) + scale_y_continuous(labels = scales::number_format(accuracy = 0.01), limits = c(.65, 1)) + labs( y = "ROC AUC Score", x = "Model Type", color = "Model" ) + theme_bw(base_size = 14) + theme( legend.position = "none", panel.grid.minor = element_blank(), strip.text = element_text(face = "bold", size = 10) ) } plot_spatial_vip_comparison <- function(res_random, res_block, res_cluster, model_id = "base_rf") { # Internal helper to extract VIP from the best fold get_best_fold_vip <- function(wflow_obj, strategy_name) { # 1. Extract results for the specific model ID (e.g., "base_rf") resample_results <- wflow_obj %>% extract_workflow_set_result(model_id) # 2. Find the best fold best_fold_id <- resample_results %>% collect_metrics(summarize = FALSE) %>% filter(.metric == "roc_auc") %>% slice_max(.estimate, n = 1) %>% slice(1) %>% pull(id) # 3. Get the split object for that fold best_split <- resample_results$splits[[which(resample_results$id == best_fold_id)]] # 4. Refit and calculate Importance wflow_obj %>% extract_workflow(model_id) %>% finalize_workflow(select_best(resample_results, metric = "roc_auc")) %>% fit(data = analysis(best_split)) %>% extract_fit_parsnip() %>% vi() %>% mutate(strategy = strategy_name) } # Consolidate and clean vip_data <- bind_rows( get_best_fold_vip(res_random, "Random CV"), get_best_fold_vip(res_block, "Block CV"), get_best_fold_vip(res_cluster, "Cluster CV") ) %>% # Filter ONLY for Lat/Lon (The interaction term doesn't exist in your new recipe) filter(Variable %in% c("lat", "lon")) %>% mutate( Variable = case_when( Variable == "lon" ~ "Longitude", Variable == "lat" ~ "Latitude", TRUE ~ Variable ), strategy = factor(strategy, levels = c("Cluster CV", "Block CV", "Random CV")) ) # Generate Plot ggplot(vip_data, aes(x = Importance, y = strategy)) + geom_segment(aes(x = 0, xend = Importance, y = strategy, yend = strategy), color = "gray20", linewidth = .65) + geom_point(aes(color = strategy), size = 5, show.legend = FALSE) + facet_grid(Variable ~ ., scales = "free_y", space = "free_y", switch = "y") + scale_color_discrete_qualitative(palette = "Dark 3") + scale_x_continuous(expand = c(0, 0)) + # Removed fixed limit (105) to let it scale automatically labs( title = paste("Spatial Overfitting Check:", model_id), subtitle = "Does the model rely on coordinates?", x = "Importance Score (Impurity)", y = NULL ) + theme_classic(base_size = 14) + theme( panel.grid.minor = element_blank() ) } # vvv ADD THIS vvv plot_model_stability <- function(res_random, res_block, res_cluster, model_id) { # Internal helper to extract per-fold metrics extract_fold_metrics <- function(wflow_obj, strategy_name) { wflow_obj %>% extract_workflow_set_result(id = model_id) %>% # <--- Now this works collect_metrics(summarize = FALSE) %>% filter(.metric == "roc_auc") %>% mutate(strategy = strategy_name) } # Combine and factorize all_fold_metrics <- bind_rows( extract_fold_metrics(res_random, "Random CV"), extract_fold_metrics(res_block, "Block CV"), extract_fold_metrics(res_cluster, "Cluster CV") ) %>% mutate(strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV"))) # Generate Plot ggplot(all_fold_metrics, aes(x = strategy, y = .estimate, color = strategy)) + geom_violin(alpha = 0.4, outlier.shape = NA, width = 0.5) + geom_jitter(width = 0.1, size = 3, alpha = 0.8) + scale_fill_discrete_qualitative(palette = "Dark 3") + scale_color_discrete_qualitative(palette = "Dark 3") + scale_y_continuous(limits = c(0.80, 1.0), breaks = seq(0.80, 1.0, 0.05)) + labs( title = paste("Stability Analysis:", model_id), # <--- Helpful title x = NULL, y = "ROC AUC (per fold)", caption = "" ) + theme_classic(base_size = 14) + theme( legend.position = "none" ) } plot_final_test_results <- function(final_fit_obj) { final_metrics <- final_fit_obj %>% collect_metrics() %>% filter(.metric %in% c("roc_auc", "accuracy")) %>% mutate( .metric = case_when( .metric == "roc_auc" ~ "ROC AUC", .metric == "accuracy" ~ "Accuracy", TRUE ~ .metric ) ) ggplot(final_metrics, aes(x = .metric, y = .estimate, fill = .metric)) + geom_col(width = 0.6, show.legend = FALSE) + geom_text(aes(label = round(.estimate, 3)), vjust = -0.5, size = 5, fontface = "bold") + scale_fill_manual(values = c("ROC AUC" = "#2c3e50", "Accuracy" = "#18bc9c")) + scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2), expand = c(0, 0)) + labs( x = NULL, y = "Performance Score", caption = "Evaluation based on the 20% held-out Washington Test Set." ) + theme_classic(base_size = 14) + theme(axis.text.x = element_text(face = "bold", size = 12)) } plot_final_confusion_matrix <- function(final_fit_obj) { # Extract the predictions from the last_fit object preds <- final_fit_obj %>% collect_predictions() # Generate the confusion matrix cm <- preds %>% conf_mat(truth = forested, estimate = .pred_class) # Convert to a tidy format for plotting autoplot(cm, type = "heatmap") + scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") + theme_minimal(base_size = 14) + theme( panel.grid = element_blank(), axis.text = element_text(face = "bold"), plot.title = element_text(face = "bold"), legend.position = "none" ) } plot_fold_mechanics <- function(wa_sf, boundary_wa_sf) { # 1. SETUP set.seed(42) # Ensure unique_id exists to prevent the "object not found" error if(!"unique_id" %in% names(wa_sf)) { wa_sf <- wa_sf %>% mutate(unique_id = row_number()) } # 2. DEFINE CUSTOM DISTANCE dist_env <- function(x) { x %>% st_drop_geometry() %>% select(elevation, precip_annual, temp_annual_mean, roughness) %>% scale() %>% dist() } # 3. CREATE SPLITS random_folds <- vfold_cv(wa_sf, v = 5) spatial_folds <- spatial_block_cv(wa_sf, v = 5) cluster_folds <- spatial_clustering_cv(wa_sf, v = 5, cluster_function = "hclust", distance_function = dist_env) # 4. HELPER: EXTRACT STATUS get_status <- function(split_obj) { first_split <- split_obj$splits[[1]] assessment_ids <- assessment(first_split) %>% pull(unique_id) wa_sf %>% mutate(status = if_else(unique_id %in% assessment_ids, "Assessment (Test)", "Analysis (Train)")) } df_r <- get_status(random_folds) df_s <- get_status(spatial_folds) df_c <- get_status(cluster_folds) # 5. PLOTTER WITH BOUNDARY LAYER plot_one <- function(df, title) { ggplot() + # Layer 1: The state boundary provides the spatial context geom_sf(data = boundary_wa_sf, fill = "gray98", color = "gray85") + # Layer 2: The actual data points geom_sf(data = df, aes(color = status), size = 0.4, alpha = 0.7) + scale_color_manual(values = c("Assessment (Test)" = "#E16A86", "Analysis (Train)" = "#606060")) + ggtitle(title) + theme_void() + theme( legend.position = "none", plot.title = element_text(hjust = 0.5, face = "bold", size = 10) ) } # 6. COMBINE (plot_one(df_r, "Random\n(Confetti)") + plot_one(df_s, "Spatial\n(Checkerboard)") + plot_one(df_c, "Env. Clustering\n(Ecoregions)")) + plot_layout(ncol = 3, guides = "collect") & theme(legend.position = 'bottom', legend.title = element_blank()) } plot_classic_kfold_diagram <- function(){ library(tidyverse) # 1. Setup Parameters N_obs <- 100 K_folds <- 5 # 2. Create Mock Data and Assign Folds set.seed(42) base_data <- tibble( obs_id = 1:N_obs, assigned_fold_group = sample(rep(1:K_folds, length.out = N_obs)) ) # 3. Expand Data for Plotting (Cross-Join) plot_data <- expand_grid( obs_id = 1:N_obs, iteration = 1:K_folds ) %>% left_join(base_data, by = "obs_id") %>% # Define Status: If the iteration matches the assigned fold group, it's Test data. mutate( status = case_when( iteration == assigned_fold_group ~ "Assessment (20%)", TRUE ~ "Analysis (80%)" ), # Convert iteration to factor and reverse levels so Fold 1 is at the top of plot iteration_fct = factor(iteration, levels = rev(1:K_folds)) ) # 4. Define Colors cv_colors <- c( "Analysis (80%)" = "#A6CEE3", "Assessment (20%)" = "#FDB462" ) # 5. Generate the Plot ggplot(plot_data, aes(x = obs_id, y = iteration_fct, fill = status)) + geom_tile(color = "white", linewidth = 0.2) + scale_fill_manual(values = cv_colors, name = "Data Role") + labs( x = "Observations", y = "Folds" ) + # Theme adjustments theme_minimal(base_size = 16) + theme( panel.grid = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom" ) } create_performance_table <- function(results_cluster, final_fit_results) { require(dplyr) require(gt) require(tidymodels) # 1. Extract the WINNING Model Info (Name & Score) best_model_info <- results_cluster %>% rank_results(rank_metric = "roc_auc", select_best = TRUE) %>% filter(.metric == "roc_auc") %>% slice(1) # Grab the #1 spot # Get the dynamic name and score best_model_name <- best_model_info$wflow_id cv_score <- best_model_info$mean # 2. Extract the Final Test Set Scores test_scores <- final_fit_results %>% collect_metrics() %>% select(.metric, .estimate) %>% tidyr::pivot_wider(names_from = .metric, values_from = .estimate) # 3. Create the Combined Data Frame summary_data <- tibble( # FIXED: Uses the actual winner's name instead of hardcoded text Model = best_model_name, `Validation AUC` = cv_score, `Test AUC` = test_scores$roc_auc, `Test Accuracy` = test_scores$accuracy ) # 4. Render with gt summary_data %>% gt() %>% tab_header( title = md("**Final Model Performance**"), subtitle = paste("Winner:", best_model_name) ) %>% fmt_number( columns = where(is.numeric), decimals = 3 ) %>% cols_align( align = "center", columns = where(is.numeric) ) %>% tab_footnote( footnote = "Mean ROC AUC across spatial cross-validation folds.", locations = cells_column_labels(columns = `Validation AUC`) ) %>% tab_footnote( footnote = "Evaluated on the 20% independent Washington holdout set.", locations = cells_column_labels(columns = contains("Test")) ) %>% tab_options( table.border.top.color = "white", table.border.bottom.color = "black", column_labels.font.weight = "bold" ) } plot_yeo_johnson <- function(data) { # 1. Prepare Recipe & Extract Lambda rec_yeo <- recipe(~ elevation, data = data) %>% step_YeoJohnson(elevation) %>% prep() lambda_val <- tidy(rec_yeo, number = 1) %>% filter(terms == "elevation") %>% pull(value) %>% round(2) # 2. Create Plotting Data plot_data <- data %>% select(elevation) %>% mutate(type = "Original") %>% bind_rows( bake(rec_yeo, new_data = data) %>% mutate(type = "Transformed") %>% rename(elevation_trans = elevation) ) # 3. Setup Colors dark3_cols <- colorspace::qualitative_hcl(2, palette = "Dark 3") # 4. Plot A: Original p1 <- ggplot(filter(plot_data, type == "Original"), aes(x = elevation)) + geom_density(fill = dark3_cols[1], color = dark3_cols[1], alpha = 0.6, linewidth = 1) + scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + labs(x = "Elevation (m)", y = "Density") + theme_minimal(base_size = 14) + theme( panel.grid.minor = element_blank(), panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5) ) # 5. Plot B: Transformed p2 <- ggplot(filter(plot_data, type == "Transformed"), aes(x = elevation_trans)) + geom_density(fill = dark3_cols[2], color = dark3_cols[2], alpha = 0.6, linewidth = 1) + scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + labs(x = paste0("Yeo-Johnson (Lambda: ", lambda_val, ")"), y = "Density") + theme_minimal(base_size = 14) + theme( panel.grid.minor = element_blank(), axis.title.y = element_blank(), axis.text.y = element_blank(), panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5) ) # 6. Combine p1 + p2 } #' Save Model Error Diagnostic Map #' #' @description Generates a diagnostic map highlighting prediction errors. It plots #' misclassified points colored by the magnitude of the error (confidence in the wrong answer) #' over a hillshaded elevation background. #' #' @param data A data frame containing model predictions (must include '.pred_class', #' 'forested', '.pred_Yes', 'lon', and 'lat'). #' @param boundary_sf An \code{sf} object representing the state boundary. #' @param raster_path Character string. File path to the elevation raster (.tif). #' @param output_path Character string. File path where the PNG will be saved. #' #' @importFrom terra rast terrain shade #' @importFrom sf st_transform st_as_sf st_filter #' @importFrom dplyr filter mutate #' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_viridis_c labs ggsave guides guide_colorbar unit margin #' @importFrom tidyterra geom_spatraster scale_fill_hypso_c #' #' @return The \code{output_path} (invisible), for integration with \code{targets}. #' @export save_error_map_png <- function(data, boundary_sf, raster_path, output_path) { # 1. Load & Process Hillshade (Standard Setup) elev_terra <- terra::rast(raster_path) slope <- terra::terrain(elev_terra, "slope", unit = "radians") aspect <- terra::terrain(elev_terra, "aspect", unit = "radians") hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315) # 2. Prep Vectors & Calculate Errors wa_boundary_sf <- sf::st_transform(boundary_sf, 4326) # Filter to ONLY the mistakes and calculate how "wrong" they were error_sf <- data %>% # Keep only rows where the Hard Class Prediction was wrong dplyr::filter(.pred_class != forested) %>% # Calculate Magnitude: How far was the probability from the truth? dplyr::mutate( truth_num = ifelse(forested == "Yes", 1, 0), error_magnitude = abs(truth_num - .pred_Yes) ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_filter(wa_boundary_sf) # 3. Plot plt <- ggplot2::ggplot() + # --- Background Layers --- tidyterra::geom_spatraster(data = elev_terra) + tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") + tidyterra::geom_spatraster(data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE) + ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) + ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) + # --- The Error Layer --- ggplot2::geom_sf( data = error_sf, ggplot2::aes(color = error_magnitude), size = 3, alpha = 0.9 ) + ggplot2::scale_color_viridis_c( option = "plasma", name = "Error\nConfidence", limits = c(0.5, 1.0), direction = -1 ) + # --- Theme Application --- theme_forestry_spatial() + ggplot2::labs(x = NULL, y = NULL) + # --- Legend Controls (Right Side Vertical) --- ggplot2::guides( fill = "none", color = ggplot2::guide_colorbar( title.position = "top", barwidth = ggplot2::unit(0.5, "cm"), # Narrow width barheight = ggplot2::unit(6, "cm") # Tall height for right side ) ) + ggplot2::theme(legend.position = "right") # Save ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white") return(output_path) } #' Calculate Area of Applicability Data #' #' @description Generates the Area of Applicability (AOA) scores (Dissimilarity Index) #' for the Georgia extrapolation dataset based on the Washington training data. #' #' @param train_data Dataframe. The training data from Washington. #' @param test_data Dataframe. The extrapolation data from Georgia. #' @param predictors Character vector. The list of predictor variable names. #' #' @importFrom dplyr select all_of bind_cols #' @importFrom tibble tibble #' @importFrom waywiser ww_area_of_applicability #' @importFrom stats predict #' @importFrom sf st_as_sf #' #' @return An \code{sf} object containing the Georgia data with an added 'di' (Dissimilarity Index) column. #' @export calculate_ga_aoa <- function(train_data, test_data, predictors) { # 1. Prepare Environments train_env <- train_data %>% dplyr::select(dplyr::all_of(predictors)) test_env <- test_data %>% dplyr::select(dplyr::all_of(predictors)) # 2. Define "Flat" Importance importance <- tibble::tibble(term = predictors, estimate = 1) # 3. Fit AOA aoa_model <- waywiser::ww_area_of_applicability( x = train_env, importance = importance ) # 4. Predict aoa_results <- stats::predict(aoa_model, test_env) # 5. Return Spatial Object test_data %>% dplyr::bind_cols(aoa_results) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) } #' Plot Georgia Area of Applicability (AOA) #' #' @description Plots the pre-calculated Dissimilarity Index (DI) for Georgia. #' #' @param aoa_sf An \code{sf} object containing the 'di' column (output of \code{calculate_ga_aoa}). #' #' @importFrom ggplot2 ggplot geom_sf aes scale_color_viridis_c labs guides guide_colorbar unit theme #' #' @return A \code{ggplot} object showing the Dissimilarity Index map. #' @export plot_georgia_aoa <- function(aoa_sf) { ggplot2::ggplot(aoa_sf) + # Points colored by Dissimilarity Index (DI) ggplot2::geom_sf(ggplot2::aes(color = di), size = 1, alpha = 0.8) + # Color Scale ggplot2::scale_color_viridis_c( option = "magma", direction = 1, name = "Dissimilarity\nIndex (DI)" ) + # Theme theme_forestry_spatial() + ggplot2::labs(x = NULL, y = NULL) + # Legend Control ggplot2::guides( color = ggplot2::guide_colorbar( title.position = "top", barwidth = ggplot2::unit(0.5, "cm"), barheight = ggplot2::unit(6, "cm") ) ) + ggplot2::theme(legend.position = "right") } # Predict on a new region (External Validation) predict_external_region <- function(final_fit, new_data) { wa_model <- final_fit$.workflow[[1]] # FIX: Mock ALL columns that were in the training data # The model expects 'geometry' because training data was an sf object clean_data <- new_data %>% dplyr::mutate( northness = NA_real_, county = NA_character_, geometry = NA # Tidymodels will see this exists, then ignore it (as it's not a predictor) ) # Predict Class and Probability preds <- predict(wa_model, clean_data) %>% dplyr::bind_cols(predict(wa_model, clean_data, type = "prob")) %>% dplyr::bind_cols(new_data %>% dplyr::select(lat, lon, forested)) return(preds) } #' Plot Georgia Forest Comparison #' #' @description Creates a side-by-side comparison of forest cover for Georgia. #' The left plot (a) is the Model Prediction, and the right plot (b) is the Actual Data. #' Features a shared right-side legend and standardized spatial styling. #' #' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat). #' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia"). #' #' @importFrom dplyr filter #' @importFrom sf st_as_sf st_transform st_crs #' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual labs theme guides guide_legend element_text #' @importFrom patchwork plot_layout wrap_plots plot_annotation #' #' @return A \code{patchwork} object containing the labeled comparison plot. #' @export plot_ga_comparison_map <- function(pred_data, boundaries) { # 1. Prepare Data & Boundaries ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA") plot_data <- pred_data %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_transform(sf::st_crs(ga_boundary)) # 2. Helper Function (No Title Argument) create_map <- function(data, fill_col) { ggplot2::ggplot() + # Background ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") + # Points ggplot2::geom_sf(data = data, ggplot2::aes(color = {{ fill_col }}), size = 0.5, alpha = 0.6) + # Colors ggplot2::scale_color_manual( name = "Forest Cover", values = c("Yes" = "#228B22", "No" = "#D2691E"), labels = c("Forest", "Non-Forest") ) + # Labels (No Title) ggplot2::labs(title = NULL, x = NULL, y = NULL) + # Theme theme_forestry_spatial() + # Define Legend Here to avoid Patchwork "Order of Ops" Error ggplot2::guides( color = ggplot2::guide_legend( override.aes = list(size = 4, alpha = 1), title.position = "top", ncol = 1 ) ) + ggplot2::theme( legend.title = ggplot2::element_text(face = "bold", size = 12), legend.text = ggplot2::element_text(size = 10) ) } # 3. Create Subplots p_pred <- create_map(plot_data, .pred_class) # Left: Predicted p_actual <- create_map(plot_data, forested) # Right: Actual # 4. Patchwork Layout with (a)/(b) Tags combined_plot <- (p_pred + p_actual) + patchwork::plot_layout(guides = "collect") + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')', theme = theme_forestry_spatial() # Ensure tags match font ) & ggplot2::theme( legend.position = "right", plot.tag = ggplot2::element_text(size = 20, face = "bold") # Make tags prominent ) return(combined_plot) } plot_ga_confusion_matrix <- function(pred_data) { # 1. Generate the confusion matrix object cm <- pred_data %>% yardstick::conf_mat(truth = forested, estimate = .pred_class) # 2. Plot using autoplot to match the previous style exactly plt <- ggplot2::autoplot(cm, type = "heatmap") + # MATCHED: The exact blue gradient from your previous slide ggplot2::scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") + ggplot2::theme_minimal(base_size = 14) + ggplot2::theme( panel.grid = ggplot2::element_blank(), axis.text = ggplot2::element_text(face = "bold"), plot.title = ggplot2::element_text(face = "bold", hjust = 0.5), # Added hjust for center alignment legend.position = "none" ) return(plt) } #' Plot Failure Mechanism Comparison #' #' @description Creates a side-by-side diagnostic plot returning a patchwork object. #' (a) The Area of Applicability (Dissimilarity Index) showing where the model is extrapolating. #' (b) The spatial distribution of actual classification errors. #' #' @param aoa_data Dataframe containing the AOA results (must have 'di', 'lon', 'lat'). #' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat). #' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia"). #' #' @importFrom dplyr filter mutate case_when #' @importFrom sf st_as_sf st_transform st_crs #' @importFrom ggplot2 ggplot geom_sf aes labs theme guides guide_legend guide_colorbar element_rect element_text unit #' @importFrom colorspace scale_color_discrete_qualitative #' @importFrom patchwork plot_layout plot_annotation wrap_plots #' #' @return A \code{patchwork} object containing the side-by-side comparison. #' @export plot_failure_mechanism <- function(aoa_data, pred_data, boundaries) { # 1. Prep Boundaries ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA") target_crs <- sf::st_crs(ga_boundary) # 2. Prep Map A: Dissimilarity (Risk) aoa_sf <- aoa_data %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_transform(target_crs) p_risk <- ggplot2::ggplot() + ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") + ggplot2::geom_sf(data = aoa_sf, ggplot2::aes(color = di), size = 0.5, alpha = 0.8) + ggplot2::scale_color_viridis_c( option = "magma", direction = 1, name = "Dissimilarity Idx." ) + ggplot2::labs(x = NULL, y = NULL) + theme_forestry_spatial() + ggplot2::guides( color = ggplot2::guide_colorbar( title.position = "top", title.hjust = 0.5, barwidth = ggplot2::unit(4.5, "cm"), barheight = ggplot2::unit(.3, "cm") ) ) + ggplot2::theme(legend.position ="bottom") # 3. Prep Map B: Failures (Effect) errors_sf <- pred_data %>% dplyr::filter(.pred_class != forested) %>% dplyr::mutate( error_type = dplyr::case_when( .pred_class == "Yes" & forested == "No" ~ "False Positive", .pred_class == "No" & forested == "Yes" ~ "False Negative" ) ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>% sf::st_transform(target_crs) p_fail <- ggplot2::ggplot() + ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") + ggplot2::geom_sf(data = errors_sf, ggplot2::aes(color = error_type), size = 0.6, alpha = 0.6) + colorspace::scale_color_discrete_qualitative( palette = "Dark 3", name = "Error Type" ) + ggplot2::labs(x = NULL, y = NULL) + theme_forestry_spatial() + ggplot2::guides( color = ggplot2::guide_legend( override.aes = list(size = 4, alpha = 1), title.position = "top", title.hjust = 0.5 ) ) + ggplot2::theme(legend.position = "bottom") # 4. Patchwork Assembly combined_plot <- (p_risk + p_fail) + patchwork::plot_layout(guides = "keep") + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')', theme = theme_forestry_spatial() ) & ggplot2::theme( plot.tag = ggplot2::element_text(size = 20, face = "bold"), plot.background = ggplot2::element_rect(fill = "transparent", color = NA) ) return(combined_plot) } #' Plot Annual Rainfall Comparison (Clipped Hexes) #' #' @description Creates a polished side-by-side comparison of annual precipitation. #' Hexagons are spatially generated and clipped to the exact state boundaries #' to eliminate "bleeding" edges. Uses \code{theme_forestry_void} with explicit #' font sizing to match topographic maps. #' #' @param wa_data Dataframe containing Washington data (requires 'precip_annual', 'lat', 'lon'). #' @param ga_data Dataframe containing Georgia data (requires 'precip_annual', 'lat', 'lon'). #' @param boundaries An \code{sf} object containing state boundaries. #' @param bins Integer. Number of hexes across the state width. Default is 30. #' @param max_limit Numeric. The visual cap for rainfall (mm) to ensure comparable scales. Default is 2500. #' #' @importFrom dplyr filter mutate group_by summarize inner_join row_number #' @importFrom sf st_as_sf st_transform st_crs st_make_grid st_intersection st_join st_drop_geometry #' @importFrom ggplot2 ggplot geom_sf aes scale_fill_distiller guide_colorbar unit theme element_text element_rect margin coord_sf rel #' @importFrom scales squish #' @importFrom patchwork plot_layout plot_annotation #' #' @return A \code{patchwork} object containing the side-by-side comparison. #' @export plot_precip_hex_comparison <- function(wa_data, ga_data, boundaries, bins = 30, max_limit = 2500) { # 1. Clean Data wa_clean <- wa_data %>% dplyr::filter(!is.na(precip_annual)) %>% dplyr::mutate(precip_annual = as.numeric(precip_annual)) ga_clean <- ga_data %>% dplyr::filter(!is.na(precip_annual)) %>% dplyr::mutate(precip_annual = as.numeric(precip_annual)) # 2. Helper: Generate Clipped Hex Grid get_clipped_hex_layer <- function(point_data, boundary_sf, n_bins) { target_crs <- sf::st_crs(boundary_sf) points_sf <- sf::st_as_sf(point_data, coords = c("lon", "lat"), crs = 4326) %>% sf::st_transform(target_crs) grid <- sf::st_make_grid(boundary_sf, n = n_bins, square = FALSE, flat_topped = TRUE) %>% sf::st_as_sf() %>% dplyr::mutate(hex_id = dplyr::row_number()) grid_clipped <- sf::st_intersection(grid, boundary_sf) joined <- sf::st_join(points_sf, grid_clipped, join = sf::st_within) hex_summary <- joined %>% dplyr::filter(!is.na(hex_id)) %>% dplyr::group_by(hex_id) %>% dplyr::summarize(precip_mean = mean(precip_annual, na.rm = TRUE)) final_layer <- grid_clipped %>% dplyr::inner_join(sf::st_drop_geometry(hex_summary), by = "hex_id") return(final_layer) } # 3. Process States wa_bnd <- boundaries %>% dplyr::filter(NAME == "Washington" | NAME == "WA") ga_bnd <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA") wa_hex_sf <- get_clipped_hex_layer(wa_clean, wa_bnd, bins) ga_hex_sf <- get_clipped_hex_layer(ga_clean, ga_bnd, bins) # 4. Plotting Helper plot_hex_sf <- function(hex_data, boundary_sf) { ggplot2::ggplot() + ggplot2::geom_sf(data = hex_data, ggplot2::aes(fill = precip_mean), color = NA) + ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) + # [CRITICAL] Removes grid lines (datum=NA) and removes padding (expand=FALSE) ggplot2::coord_sf(datum = NA, expand = FALSE) + ggplot2::scale_fill_distiller( palette = "Blues", direction = 1, limits = c(0, max_limit), oob = scales::squish, name = "Mean Annual Precip (mm)", guide = ggplot2::guide_colorbar( title.position = "top", title.hjust = 0.5, barwidth = ggplot2::unit(8, "cm"), barheight = ggplot2::unit(0.4, "cm"), frame.colour = "black", ticks.colour = "black" ) ) + # Use Base Size 16 to match Topo Plot theme_forestry_void(base_size = 16) + ggplot2::theme(legend.position = "none") } p_wa <- plot_hex_sf(wa_hex_sf, wa_bnd) p_ga <- plot_hex_sf(ga_hex_sf, ga_bnd) # 5. Patchwork Assembly combined_plot <- (p_wa + p_ga) + patchwork::plot_layout(guides = "collect") + patchwork::plot_annotation( tag_levels = 'a', tag_prefix = '(', tag_suffix = ')' ) & # Ensure the combined plot also uses base_size 16 theme_forestry_void(base_size = 28) return(combined_plot) } #' Get Major Peaks for Plotting #' #' Internal helper to return peak locations for WA and GA. #' #' @param state_name String. Either "Washington" or "Georgia". #' @return An sf object containing peak names and locations. #' #' @importFrom tibble tibble #' @importFrom sf st_as_sf #' @noRd get_state_peaks <- function(state_name) { if (state_name == "Washington") { tibble::tibble( peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"), lat = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013), lon = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111) ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) } else if (state_name == "Georgia") { tibble::tibble( peak = c("Brasstown Bald", "Rabun Bald", "Blood Mtn"), lat = c(34.8743, 34.9660, 34.7398), lon = c(-83.8111, -83.2996, -83.9367) ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) } else { NULL } } #' Plot Spatial Autocorrelation Exploration #' #' @description Generates a Moran Scatterplot to visualize spatial #' autocorrelation in elevation data using a 5km neighborhood. #' #' @param wa_data A dataframe or tibble containing elevation, lat, and lon columns. #' #' @return A ggplot object showing standardized elevation vs. spatially lagged elevation. #' #' @importFrom sf st_as_sf st_transform st_coordinates st_drop_geometry #' @importFrom spdep dnearneigh nb2listw lag.listw card #' @importFrom ggplot2 ggplot aes geom_point geom_smooth geom_hline geom_vline labs theme_minimal #' @importFrom dplyr mutate #' #' @export plot_spatial_exploration <- function(wa_data) { # 1. Convert to sf and project to meters (Washington State Plane North) # This ensures the 5000 distance in dnearneigh is 5,000 meters. wa_sf <- wa_data |> sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |> sf::st_transform(2285) # 2. Extract coordinates and build weights (5km radius) coords <- sf::st_coordinates(wa_sf) nb <- spdep::dnearneigh(coords, 0, 5000) # Remove points with zero neighbors to prevent divide-by-zero errors ids_with_nb <- which(spdep::card(nb) > 0) nb <- subset(nb, spdep::card(nb) > 0) wa_sf <- wa_sf[ids_with_nb, ] # Row-standardized weights matrix lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE) # 3. Prep data for plotting plot_df <- wa_sf |> sf::st_drop_geometry() |> dplyr::mutate( # Scale elevation to get z-scores elev_scaled = as.vector(scale(elevation)), # Calculate the spatial lag (average elevation of neighbors) elev_lag = spdep::lag.listw(lw, elev_scaled, zero.policy = TRUE) ) # 4. Build Plot Object p <- ggplot2::ggplot(plot_df, ggplot2::aes(x = elev_scaled, y = elev_lag)) + ggplot2::geom_point(alpha = 0.2, color = "#2c3e50") + ggplot2::geom_smooth( method = "lm", color = "#e74c3c", linetype = "dashed", formula = y ~ x ) + ggplot2::geom_hline(yintercept = 0, alpha = 0.3) + ggplot2::geom_vline(xintercept = 0, alpha = 0.3) + ggplot2::labs( title = "Spatial Autocorrelation: Moran Scatterplot", subtitle = "Washington Elevation (5km neighborhood)", x = "Standardized Elevation (Z-score)", y = "Spatially Lagged Elevation (Neighbors)" ) + ggplot2::theme_minimal() return(p) }