#' Helper: Save Plot for Quarto Slide
#'
#' @description Saves a ggplot object as a PNG, sized to fit comfortably 
#' below a standard slide title, with robust font handling.
#'
#' @param plot_obj The ggplot object to save.
#' @param name A short descriptive name (e.g., "wa_ecoregions").
#' @param type Either "map" or "plot". Adds this prefix to the filename.
#' @param width Width in inches (default: 10).
#' @param height Height in inches (default: 6.18).
#' @param dpi Resolution (default: 300).
#'
#' @return The full file path (invisibly).
#' @export
helper_save_fig <- function(plot_obj, name, type = c("map", "plot"), 
                            width = 10, height = 6.18, dpi = 300) {
  
  # 1. Input Validation & Directory Setup
  type <- match.arg(type)
  if (!dir.exists("figs")) dir.create("figs")
  
  # Construct filename: figs/map_washington.png
  filename <- paste0("figs/", type, "_", name, ".png")
  
  # 2. Force Fonts (The "Amnesia" Fix)
  # Critical: We must turn the engine on immediately before saving
  # to prevent the new graphics device from reverting to Arial.
  sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
  showtext::showtext_auto()
  showtext::showtext_opts(dpi = dpi)
  
  # 3. Save the File
  ggplot2::ggsave(
    filename = filename,
    plot = plot_obj,
    width = width,
    height = height,
    dpi = dpi,
    bg = "white" # Prevents transparent backgrounds
  )
  
  message(paste("Saved:", filename))
  return(invisible(filename))
}

#' Register Project Fonts
#' 
#' Registers 'Atkinson Hyperlegible Next' (Sans) and 'Atkinson Hyperlegible Mono' 
#' (Monospace) with the sysfonts package for use in R graphics.
#'
#' @return NULL (called for side effects)
#' @export
setup_forestry_fonts <- function() {
  
  font_dir <- "assets/fonts"
  
  if (!dir.exists(font_dir)) {
    warning(paste("Font directory not found at:", font_dir))
    return(NULL)
  }
  
  # 1. Main Text Font (Sans)
  sysfonts::font_add(
    family = "atkinson",
    regular = file.path(font_dir, "AtkinsonHyperlegibleNext-Regular.ttf"),
    bold    = file.path(font_dir, "AtkinsonHyperlegibleNext-Bold.ttf"),
    italic  = file.path(font_dir, "AtkinsonHyperlegibleNext-Italic.ttf"),
    bolditalic = file.path(font_dir, "AtkinsonHyperlegibleNext-BoldItalic.ttf")
  )
  
  # 2. Code/Number Font (Mono)
  sysfonts::font_add(
    family = "atkinson_mono",
    regular = file.path(font_dir, "AtkinsonHyperlegibleMono-Regular.ttf"),
    bold    = file.path(font_dir, "AtkinsonHyperlegibleMono-Bold.ttf")
  )
  
  # 3. Enable showtext rendering
  showtext::showtext_auto()
  
  showtext::showtext_opts(dpi = 300)
}


#' Standardized Void Theme (Maximal Data Ink)
#' @description Removes axes/grids for shape-focused maps, but keeps project fonts.
#' @export
theme_forestry_void <- function(base_size = 16) {
  # 1. Start with the void (empty) theme
  ggplot2::theme_void(base_family = "atkinson", base_size = base_size) +
    
    # 2. Add back the specific text elements we need
    ggplot2::theme(
      # Keep the Legend styled correctly
      legend.position = "bottom",
      legend.title = ggplot2::element_text(
        family = "atkinson", 
        face = "bold", 
        size = ggplot2::rel(1)
      ),
      legend.text = ggplot2::element_text(
        family = "atkinson", 
        size = ggplot2::rel(0.9)
      ),
      
      # Keep Titles (if you use them inside the plot)
      plot.title = ggplot2::element_text(
        family = "atkinson", 
        face = "bold", 
        hjust = 0.5,
        margin = ggplot2::margin(b = 5)
      ),
      
      # Zero margins to force edges to touch
      plot.margin = ggplot2::margin(5, 5, 5, 5)
    )
}

#' Standard Forestry Plot Theme (Cowplot + Atkinson)
#'
#' @description A standardized theme for non-spatial plots (scatter, bar, line).
#' Based on cowplot::theme_cowplot(), it includes clean axes and a minimalist look.
#' Uses 'Atkinson Hyperlegible Next' for all text.
#'
#' @param font_size Integer. Base font size. Default is 14 (good for slides).
#' @param grid Logical. If TRUE, adds a light gray grid (useful for presentations).
#'
#' @importFrom cowplot theme_cowplot background_grid
#' @importFrom ggplot2 theme element_text element_rect margin
#' @export
theme_forestry_plot <- function(font_size = 14, grid = TRUE) {
  
  # 1. Ensure Fonts are Registered
  setup_forestry_fonts()
  
  # 2. Base Theme (Cowplot with Atkinson)
  # cowplot::theme_cowplot has a 'font_family' argument we can leverage directly
  base <- cowplot::theme_cowplot(font_size = font_size, font_family = "atkinson") +
    ggplot2::theme(
      
      # Fonts: We force family="atkinson" here just to be safe for overrides
      plot.title    = ggplot2::element_text(family = "atkinson", face = "bold", hjust = 0.5, size = font_size + 2),
      axis.title    = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
      legend.title  = ggplot2::element_text(family = "atkinson", face = "bold", size = font_size),
      legend.text   = ggplot2::element_text(family = "atkinson", size = font_size - 1),
      
      # Optional: Use Monospace for axis numbers? (Uncomment if you want the "technical" look)
      # axis.text   = ggplot2::element_text(family = "atkinson_mono", size = font_size - 1),
      
      # Backgrounds (Transparent for slides)
      plot.background = ggplot2::element_rect(fill = "transparent", color = NA),
      panel.background = ggplot2::element_rect(fill = "transparent", color = NA),
      
      # Margins
      legend.box.margin = ggplot2::margin(10, 10, 10, 10)
    )
  
  # 3. Optional Grid
  if (grid) {
    base <- base + cowplot::background_grid(major = "y", minor = "none", color.major = "grey92")
  }
  
  return(base)
}

#' Standardized Spatial Theme (Atkinson)
#' @description High-visibility map theme for presentations using Atkinson fonts.
#' @importFrom ggplot2 theme_minimal theme element_line element_blank element_text element_rect margin unit
#' @export
theme_forestry_spatial <- function(base_size = 16) {
  
  # 1. Ensure Fonts are Registered
  setup_forestry_fonts()
  
  # 2. Base Theme with Relative Sizing
  ggplot2::theme_minimal(base_family = "atkinson", base_size = base_size) + 
    ggplot2::theme(
      # Minimalist grid lines (Graticules)
      panel.grid.major = ggplot2::element_line(color = "grey90", linewidth = 0.2),
      panel.grid.minor = ggplot2::element_blank(),
      
      # Titles: Scale relative to base_size (12 * 1.5 = 18pt)
      plot.title = ggplot2::element_text(
        family = "atkinson", 
        face = "bold", 
        size = ggplot2::rel(1.5), 
        color = "#2c3e50"
      ),
      
      # Axis Labels: Slightly smaller than base (12 * 0.8 = 9.6pt)
      axis.text = ggplot2::element_text(
        family = "atkinson", 
        size = ggplot2::rel(0.8), 
        color = "#5d6d7e"
      ),
      
      # Legend: Scale relative to base
      legend.position = "right",
      legend.title = ggplot2::element_text(
        family = "atkinson", 
        size = ggplot2::rel(1.1), # ~13.2pt
        face = "bold"
      ),
      legend.text = ggplot2::element_text(
        family = "atkinson", 
        size = ggplot2::rel(0.9)  # ~10.8pt
      ),
      
      # Clean borders
      panel.border = ggplot2::element_rect(color = "grey80", fill = NA, linewidth = 0.5),
      plot.margin = ggplot2::margin(5, 5, 5, 5)
    )
}
#' Simplified Theme Diagnostic
#' @description Uses built-in NC data to verify theme_forestry_spatial.
#' @importFrom sf st_read st_crs
#' @importFrom ggplot2 ggplot geom_sf labs coord_sf scale_x_continuous scale_y_continuous
#' @export
plot_theme_diagnostic <- function() {
  # Pull NC data included with the sf package
  nc <- sf::st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
  
  # Render with your spatial theme
  ggplot2::ggplot(nc) +
    ggplot2::geom_sf(fill = "#27ae60", alpha = 0.3, color = "#2c3e50") +
    # Force WGS84 graticules (Decimal Degrees)
    ggplot2::coord_sf(datum = sf::st_crs(4326)) +
    theme_forestry_spatial() +
    ggplot2::labs(
      title = "Theme Diagnostic: North Carolina",
      subtitle = "Testing 24pt Title and 14pt Graticule Labels"
    )
}

#' Combine Washington and Georgia Forest Data
#'
#' Merges the Washington and Georgia datasets into a single data frame, adding a 
#' column to identify the source state.
#'
#' @param wa_data A data frame containing the Washington forest inventory data.
#' @param ga_data A data frame containing the Georgia forest inventory data.
#'
#' @return A single combined data frame with an additional column `.id` 
#'   (renamed to "state") indicating the source ("WA" or "GA").
#'
#' @examples
#' \dontrun{
#'   combined <- combine_forest(wa_raw, ga_raw)
#'   table(combined$state)
#' }
#' 
#' @export
combine_forest <- function(wa_data, ga_data){
  dplyr::bind_rows(
    list(WA = wa_data, GA = ga_data),
    .id = "state"
  )
}

#' Format Statistical Summary Table
#'
#' @description Calculates descriptive statistics and returns a formatted `gt` table
#' ready for slide presentation.
#'
#' @param data A data frame or sf object.
#' @return A `gt` table object.
#'
#' @importFrom dplyr select arrange desc any_of where
#' @importFrom sf st_drop_geometry
#' @importFrom psych describe
#' @importFrom tibble rownames_to_column
#' @importFrom gt gt fmt_number cols_label tab_options google_font
#' @export
format_summary_table <- function(data) {
  
  # 1. Calculate Stats (The Logic)
  stats_df <- data %>%
    sf::st_drop_geometry() %>%
    dplyr::select(dplyr::where(is.numeric)) %>%
    dplyr::select(-dplyr::any_of(c("year", "lon", "lat"))) %>% 
    psych::describe() %>%
    as.data.frame() %>%
    tibble::rownames_to_column(var = "Variable") %>%
    dplyr::select(-vars, -range, -se, -mad) %>%
    dplyr::arrange(dplyr::desc(abs(kurtosis)))
  
  # 2. Format Table (The Presentation)
  stats_df %>%
    gt::gt() %>%
    gt::opt_table_font(
      font = gt::google_font("Atkinson Hyperlegible")
    ) %>%
    # Clean up column labels
    gt::cols_label(
      Variable = "Variable",
      n = "N",
      mean = "Mean",
      sd = "SD",
      median = "Median",
      trimmed = "Trimmed",
      min = "Min",
      max = "Max",
      skew = "Skew",
      kurtosis = "Kurtosis"
    ) %>%
    # Format all numbers to 2 decimal places
    gt::fmt_number(
      columns = dplyr::where(is.numeric),
      decimals = 2
    ) %>%
    # Format 'N' to 0 decimals
    gt::fmt_number(
      columns = "n",
      decimals = 0
    ) %>%
    # Styling: Increase font size and use your project font
    gt::tab_options(
      table.font.names = "Atkinson Hyperlegible",
      table.font.size = "22px", # <--- Increases size to fill the slide
      data_row.padding = "8px"  # Adds breathing room
    ) %>% 
    gt::text_transform(
      locations = gt::cells_body(columns = dplyr::where(is.numeric)),
      fn = function(x) {
        # \u2212 is the unicode minus that breaks things. Replace with standard "-".
        gsub("\u2212", "-", x)
      }
    )
}

#' Plot US Map with Forestry Theme
#' @description Highlights Washington and Georgia using standardized presentation fonts.
#' @importFrom tigris states shift_geometry
#' @importFrom rmapshaper ms_simplify ms_filter_islands
#' @importFrom dplyr filter mutate
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_manual labs
#' @export
plot_us_map <- function() {
  
  # 1. Fetch and filter for the Lower 48
  us_map <- tigris::states(cb = TRUE, resolution = "20m", year = 2022, progress_bar = FALSE) %>%
    dplyr::filter(!STUSPS %in% c("AK", "HI", "PR")) %>% 
    rmapshaper::ms_simplify(keep = 0.2) %>%
    rmapshaper::ms_filter_islands(min_area = 1e9) %>% 
    tigris::shift_geometry() %>% 
    dplyr::mutate(
      highlight = dplyr::case_when(
        STUSPS == "WA" ~ "Washington",
        STUSPS == "GA" ~ "Georgia",
        TRUE ~ NA_character_
      )
    )
  
  # 2. Define Colors
  state_colors <- c("Washington" = "#E16A86", "Georgia" = "#00AD9A")
  
  # 3. Plot
  ggplot2::ggplot() +
    # Layer 1: Base Map
    ggplot2::geom_sf(data = us_map, fill = "grey98", color = "grey50", linewidth = 0.25) +
    
    # Layer 2: Highlight Layer
    ggplot2::geom_sf(
      data = dplyr::filter(us_map, !is.na(highlight)), 
      ggplot2::aes(fill = highlight), 
      color = "grey50", 
      linewidth = 0.25
    ) +
    
    # Custom Scales
    ggplot2::scale_fill_manual(values = state_colors, name = "Study Sites") +
    
    # Standardized Forestry Theme
    theme_forestry_spatial() 
}


#' Create Single State Topo Plot
#' @description Generates a topo map with manually tuned label placement for Georgia.
#' @export
plot_state_topo <- function(data, boundary_sf, raster_path, state_name) {
  
  # 1. Load & Process Raster
  elev_terra  <- terra::rast(raster_path)
  slope  <- terra::terrain(elev_terra, "slope", unit = "radians")
  aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
  hill_terra  <- terra::shade(slope, aspect, angle = 45, direction = 315)
  
  # 2. Prep Vectors
  boundary_sf <- sf::st_transform(boundary_sf, 4326)
  peaks_sf <- get_state_peaks(state_name)
  
  # --- FIX: Manual Nudge Logic ---
  # Define default nudges (0 means "let ggrepel decide")
  nx <- 0
  ny <- 0
  
  if (state_name == "Georgia" && !is.null(peaks_sf)) {
    # Prepare nudge vectors specific to the Georgia peaks to uncross the lines
    # Units are in Decimal Degrees (roughly: 0.1 deg ~ 11km)
    peaks_sf <- peaks_sf %>% 
      dplyr::mutate(
        nudge_x = dplyr::case_when(
          peak == "Rabun Bald"     ~  0.3,  # Push Hard Right (East)
          peak == "Blood Mtn"      ~ -0.3,  # Push Hard Left (West)
          peak == "Brasstown Bald" ~  0,    # Keep Center horizontally
          TRUE ~ 0
        ),
        nudge_y = dplyr::case_when(
          peak == "Brasstown Bald" ~  0.2,  # Push Up (North)
          peak == "Rabun Bald"     ~ -0.1,  # Push Down slightly
          TRUE ~ 0
        )
      )
    
    # Extract as vectors for ggrepel
    nx <- peaks_sf$nudge_x
    ny <- peaks_sf$nudge_y
  }
  
  # 3. Plot
  ggplot2::ggplot() +
    tidyterra::geom_spatraster(data = elev_terra) + 
    tidyterra::scale_fill_hypso_c(
      palette = "usgs-gswa2", 
      name = "Elevation (m)", 
      na.value = "transparent"
    ) +
    tidyterra::geom_spatraster(
      data = hill_terra, 
      ggplot2::aes(alpha = ggplot2::after_stat(value)), 
      fill = "black", 
      show.legend = FALSE
    ) +
    ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
    ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
    
    # REPEL SETTINGS
    {if(!is.null(peaks_sf)) ggrepel::geom_label_repel(
      data = peaks_sf, ggplot2::aes(label = peak, geometry = geometry),
      stat = "sf_coordinates", 
      size = 6,             
      family = "atkinson",
      fontface = "bold", 
      
      # Inject the manual nudges here
      nudge_x = nx,
      nudge_y = ny,
      
      box.padding = 0.8,    
      point.padding = 0.5,  
      force = 10,           
      min.segment.length = 0
    )} +
    
    theme_forestry_void(base_size = 16) +
    ggplot2::coord_sf(expand = FALSE) + 
    
    ggplot2::guides(
      fill = ggplot2::guide_colorbar(
        title.position = "top",      
        title.hjust = 0.5,
        barwidth = ggplot2::unit(5, "cm"), 
        barheight = ggplot2::unit(0.3, "cm"),
        frame.colour = "black", 
        ticks.colour = "black"
      )
    ) +
    ggplot2::theme(
      legend.position = "bottom",
      legend.margin = ggplot2::margin(t = 5, b = 0)
    )
}


#' Save Combined Side-by-Side Topo Plot
#' @export
save_combined_topo <- function(wa_data, ga_data, wa_boundary, ga_boundary, 
                               wa_raster_path, ga_raster_path, output_path) {
  
  # [FIX 2] Ensure fonts are registered & showtext is ON for this worker process
  setup_forestry_fonts() 
  
  p_wa <- plot_state_topo(wa_data, wa_boundary, wa_raster_path, "Washington")
  p_ga <- plot_state_topo(ga_data, ga_boundary, ga_raster_path, "Georgia")
  
  combined_plot <- patchwork::wrap_plots(p_wa, p_ga, ncol = 2) +
    patchwork::plot_annotation(
      tag_levels = 'a', 
      tag_prefix = '(', 
      tag_suffix = ')',
      theme = theme_forestry_void(base_size = 20) 
    ) 
  
  dir.create(dirname(output_path), showWarnings = FALSE)
  
  # Save
  ggplot2::ggsave(output_path, plot = combined_plot, width = 12, height = 6, dpi = 300, bg = "white")
  return(output_path)
}

#' Plot Regional Comparison of Forested Data (WA vs GA)
#'
#' @description Generates a side-by-side comparison of forest cover for Washington 
#' and Georgia. It handles font registration (Atkinson Hyperlegible), spatial 
#' transformations, and creates a combined plot with a shared legend.
#'
#' @param data A data frame containing the forest point data. Must contain 
#'   columns `lon`, `lat`, `state`, and `forested`.
#' @param boundaries An `sf` object containing state boundaries. Must contain 
#'   a `NAME` column.
#'
#' @return A `patchwork` object containing the combined side-by-side maps.
#'
#' @importFrom sysfonts font_add_google
#' @importFrom showtext showtext_auto
#' @importFrom dplyr filter
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual coord_sf guides guide_legend theme_void theme element_text
#' @importFrom patchwork wrap_plots plot_layout plot_annotation
#'
#' @export
plot_regional_comparison <- function(data, boundaries) {
  # Load the font from Google Fonts (or a local file)
  font_add_google("Atkinson Hyperlegible", family = "atkinson")
  
  # Turn on showtext to render the font
  showtext_auto()
  
  # Spatial Prep (Fixing the NAD83/WGS84 mismatch)
  forest_sf <- data %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_transform(sf::st_crs(boundaries))
  
  state_lookup <- c("Washington" = "WA", "Georgia" = "GA")
  # Washington Plot
  abbr_wa <- state_lookup["Washington"]
  p_wa <- ggplot2::ggplot() +
    # State Boundary
    ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Washington"), 
                     fill = "grey98", color = "grey80") +
    # Forest Points
    ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_wa), 
                     ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
    # Scales
    ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"), 
                                name = "Forested") +
    ggplot2::coord_sf(expand = TRUE) +
    # Legend Tweaks
    ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
    theme_void()
  # Georgia Plot
  abbr_ga <- state_lookup["Georgia"]
  p_ga <- ggplot2::ggplot() +
    # State Boundary
    ggplot2::geom_sf(data = dplyr::filter(boundaries, NAME == "Georgia"), 
                     fill = "grey98", color = "grey80") +
    # Forest Points
    ggplot2::geom_sf(data = dplyr::filter(forest_sf, state == abbr_ga), 
                     ggplot2::aes(color = forested), alpha = 0.6, size = 0.5) +
    # Scales
    ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E"), 
                                name = "Forested") +
    ggplot2::coord_sf(expand = TRUE) +
    # Legend Tweaks
    ggplot2::guides(color = ggplot2::guide_legend(override.aes = list(size = 5, alpha = 1))) +
    theme_void()
  
  # Combine
  final_plot <- patchwork::wrap_plots(p_wa, p_ga) +
    patchwork::plot_layout(guides = "collect") +
    patchwork::plot_annotation(
      tag_levels = 'a', 
      tag_prefix = '(', 
      tag_suffix = ')'
    ) &
    ggplot2::theme(
      text = element_text(family = "atkinson", size = 16),
      legend.position = "bottom"
    )
  return(final_plot)
}
#' Download EPA Level III Ecoregions Data
#'
#' @description Downloads the EPA Level III Ecoregions shapefile (zip format) 
#' to a local directory. Implements a caching check to avoid re-downloading 
#' if the file already exists.
#'
#' @param url Character string. The direct URL to the EPA Ecoregions zip file.
#' @param dest_dir Character string. The local directory where the data should 
#' be saved. Defaults to "data/epa".
#'
#' @return A character string containing the full file path to the downloaded zip file. 
#' This return value is designed to be tracked by \code{targets}.
#'
#' @importFrom utils download.file
#' @export
get_epa_ecoregions <- function(url, dest_dir = "data/epa") {
  # Ensure directory exists
  if (!dir.exists(dest_dir)) dir.create(dest_dir, recursive = TRUE)
  
  # Define destination
  dest_file <- file.path(dest_dir, "us_eco_l3.zip")
  
  # Download only if missing
  if (!file.exists(dest_file)) {
    message("Downloading EPA Ecoregion Data...")
    download.file(url, dest_file, mode = "wb")
  }
  
  # Return the file path for the next target to track
  return(dest_file)
}

#' Process and Clip EPA Ecoregions
#'
#' @description Extracts EPA Level III ecoregion data from a zipped shapefile, 
#' standardizes column names, and clips the geometry to specified state boundaries. 
#' It includes robust steps for geometry repair (handling spherical validity), 
#' small island removal, and simplification for optimized plotting.
#'
#' @param zip_path Character string. The file path to the zipped EPA shapefile.
#' @param target_states Character vector. The names of the states to clip the 
#' ecoregions to. Defaults to \code{c("Washington", "Georgia")}.
#' @param simplify_tol Numeric. The simplification tolerance passed to 
#' \code{rmapshaper::ms_simplify}. Range is 0-1, where higher numbers remove more detail. 
#' Defaults to 0.05.
#'
#' @return An \code{sf} object containing the processed ecoregions with standardized 
#' columns \code{US_L3NAME} and \code{STATE_NAME}.
#'
#' @importFrom tigris states
#' @importFrom dplyr filter rename select %>%
#' @importFrom rmapshaper ms_simplify ms_filter_islands
#' @importFrom sf read_sf st_transform st_crs st_make_valid st_intersection sf_use_s2
#' @importFrom utils unzip
#' @export
process_ecoregions <- function(zip_path, target_states = c("Washington", "Georgia"), simplify_tol = 0.05) {
  message("Processing shapefiles for: ", paste(target_states, collapse = ", "))
  
  # A. Fetch State Boundaries (The "Cookie Cutter")
  boundaries <- tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
    filter(NAME %in% target_states) %>%
    # Simplify boundaries to match the ecoregion resolution
    rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
  
  # B. Unzip and Read EPA Data
  clean_dir <- tempfile()
  dir.create(clean_dir)
  unzip(zip_path, exdir = clean_dir)
  
  shp_file <- list.files(clean_dir, pattern = "\\.shp$", full.names = TRUE, recursive = TRUE)[1]
  eco_raw  <- sf::read_sf(shp_file)
  
  # C. Standardize Columns (Handle variations in column names)
  if (!"US_L3NAME" %in% names(eco_raw)) {
    if ("NA_L3NAME" %in% names(eco_raw)) {
      eco_raw <- rename(eco_raw, US_L3NAME = NA_L3NAME)
    } else if ("LEVEL3_NAM" %in% names(eco_raw)) {
      eco_raw <- rename(eco_raw, US_L3NAME = LEVEL3_NAM)
    }
  }
  
  # D. Clip, Clean, and Simplify
  suppressMessages(sf::sf_use_s2(FALSE)) # Turn off spherical geometry for robust clipping
  
  eco_clean <- eco_raw %>%
    st_transform(st_crs(boundaries)) %>%
    st_make_valid() %>%
    st_intersection(boundaries) %>%
    select(US_L3NAME, STATE_NAME = NAME) %>%
    rmapshaper::ms_filter_islands(min_area = 200000000) %>% 
    rmapshaper::ms_simplify(keep = simplify_tol, keep_shapes = TRUE)
  
  suppressMessages(sf::sf_use_s2(TRUE))
  
  return(eco_clean)
}

#' Plot Ecoregion Complexity Comparison (WA vs GA)
#'
#' @description Generates a side-by-side comparison of Level III ecoregions for Washington
#' and Georgia. It uses a "void" theme, qualitative colors, and carefully tuned
#' label repulsion settings to avoid overlapping text.
#'
#' @param eco_data An \code{sf} object containing ecoregion polygons. Must contain
#' columns \code{STATE_NAME} and \code{US_L3NAME}.
#'
#' @return A \code{patchwork} object containing the combined plot.
#'
#' @importFrom dplyr filter group_by summarize mutate
#' @importFrom sf st_transform st_union st_point_on_surface st_coordinates st_drop_geometry
#' @importFrom stringr str_wrap
#' @importFrom ggplot2 ggplot geom_sf aes coord_sf theme_void scale_fill_discrete guide_none element_text margin element_rect labs
#' @importFrom ggrepel geom_label_repel
#' @importFrom colorspace scale_fill_discrete_qualitative
#' @importFrom patchwork plot_annotation
#'
#' @export
plot_ecoregion_comparison <- function(eco_data) {
  
  # 1. WASHINGTON PLOT
  # A. Filter & Transform
  wa_e <- eco_data %>% 
    dplyr::filter(STATE_NAME == "Washington") %>% 
    sf::st_transform(4326)
  
  # B. Calculate Centroids for Labels
  wa_labels <- wa_e %>%
    dplyr::group_by(US_L3NAME) %>%
    dplyr::summarize(geometry = sf::st_union(geometry)) %>% 
    dplyr::mutate(
      lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
      lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
      clean_label = stringr::str_wrap(US_L3NAME, width = 35) 
    ) %>%
    sf::st_drop_geometry()
  
  # C. Generate Plot
  p_wa <- ggplot2::ggplot() +
    # Geometries
    ggplot2::geom_sf(data = wa_e, ggplot2::aes(fill = US_L3NAME), 
                     alpha = 1, color = "white", linewidth = 0.2) +
    
    ggplot2::coord_sf(datum = NA, expand = TRUE) +
    
    # Repelled Labels
    ggrepel::geom_label_repel(
      data = wa_labels,
      ggplot2::aes(x = lon, y = lat, label = clean_label),
      inherit.aes = FALSE,
      size = 5,                
      family = "atkinson", 
      lineheight = 1.6,
      box.padding = 0.75, 
      min.segment.length = 0,
      force = 30,
      max.overlaps = Inf,
      alpha = 0.95,
      segment.color = "grey30",
      segment.size = 1,
      seed = 42
    ) +
    
    # Colors
    colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
    
    # Manual Theme
    ggplot2::theme_void(base_family = "atkinson")
  
  
  # 2. GEORGIA PLOT
  # A. Filter & Transform
  ga_e <- eco_data %>% 
    dplyr::filter(STATE_NAME == "Georgia") %>% 
    sf::st_transform(4326)
  
  # B. Calculate Centroids for Labels
  ga_labels <- ga_e %>%
    dplyr::group_by(US_L3NAME) %>%
    dplyr::summarize(geometry = sf::st_union(geometry)) %>% 
    dplyr::mutate(
      lon = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 1],
      lat = sf::st_coordinates(sf::st_point_on_surface(geometry))[, 2],
      clean_label = stringr::str_wrap(US_L3NAME, width = 35) 
    ) %>%
    sf::st_drop_geometry()
  
  # C. Generate Plot
  p_ga <- ggplot2::ggplot() +
    # Geometries
    ggplot2::geom_sf(data = ga_e, ggplot2::aes(fill = US_L3NAME), 
                     alpha = 1, color = "white", linewidth = 0.2) +
    
    ggplot2::coord_sf(datum = NA, expand = TRUE) +
    
    # Repelled Labels
    ggrepel::geom_label_repel(
      data = ga_labels,
      ggplot2::aes(x = lon, y = lat, label = clean_label),
      inherit.aes = FALSE,
      size = 5,                
      family = "atkinson", 
      lineheight = 1.6,
      box.padding = 0.75, 
      min.segment.length = 0,
      force = 40,
      max.overlaps = Inf,
      alpha = 0.95,
      segment.color = "grey30",
      segment.size = 1,
      seed = 4
    ) +
    
    # Colors
    colorspace::scale_fill_discrete_qualitative(palette = "Dark 3", guide = "none") +
    
    # Manual Theme
    ggplot2::theme_void(base_family = "atkinson") +
    ggplot2::theme(
      plot.title = ggplot2::element_text(face = "bold", margin = ggplot2::margin(b = 10)),
      plot.tag = ggplot2::element_text(face = "bold"),
      legend.position = "bottom"
    ) + 
    ggplot2::labs(x = NULL, y = NULL)
  
  
  # 3. COMBINE & ANNOTATE 
  combined_plot <- (p_wa + p_ga) +
    patchwork::plot_annotation(
      tag_levels = 'a', 
      tag_prefix = '(', 
      tag_suffix = ')'
    ) & 
    # Apply theme settings globally to the patchwork
    ggplot2::theme_void(base_family = "atkinson")
  
  return(combined_plot)
}


#' Plot Univariate Distributions by Forest Cover
#'
#' @description Generates a faceted density plot comparing numeric environmental 
#' variables between forested and non-forested areas. It uses the "Dark 3" 
#' qualitative palette and applys the Wilke cowplot theme.
#'
#' @param data A data frame or sf object containing the forest data. 
#'   Must include `forested` column and numeric environmental variables.
#'
#' @return A `ggplot` object.
#'
#' @importFrom dplyr select starts_with
#' @importFrom tidyr pivot_longer
#' @importFrom ggplot2 ggplot aes geom_density facet_wrap scale_fill_manual labs
#' @importFrom cowplot theme_cowplot
#' @importFrom colorspace qualitative_hcl
#' @importFrom sysfonts font_add_google
#' @importFrom showtext showtext_auto
#'
#' @export
plot_forest_distributions <- function(data) {
  
  # 1. Typography Setup
  sysfonts::font_add_google("Atkinson Hyperlegible", family = "atkinson")
  showtext::showtext_auto()
  
  # 2. Color Palette (Dark 3 from colorspace)
  # Generating the hex codes specifically for 2 levels
  pal_colors <- colorspace::qualitative_hcl(palette = "Dark 3", n = 2)
  
  # 3. Plotting
  data %>%
    dplyr::select(
      forested, elevation, eastness, northness, roughness, 
      dew_temp, precip_annual, 
      dplyr::starts_with("temp_"), 
      dplyr::starts_with("vapor_")
    ) %>%
    tidyr::pivot_longer(
      cols = -forested, 
      names_to = "variable", 
      values_to = "value"
    ) %>%
    ggplot2::ggplot(ggplot2::aes(x = value, fill = forested)) +
    # color = NA removes the outline for a cleaner "Wilke-style" look
    ggplot2::geom_density(alpha = 0.6, color = NA) +
    ggplot2::facet_wrap(~ variable, scales = "free") +
    
    # Scales & Labels
    ggplot2::scale_fill_manual(values = pal_colors) +
    ggplot2::labs(
      x = NULL,
      y = "Density",
      fill = "Forested"
    ) +
    ggplot2::scale_y_continuous(expand = ggplot2::expansion(mult = c(0, 0.05))) +
    ggplot2::scale_x_continuous(
      expand = ggplot2::expansion(mult = c(0, 0)),
      labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
    
    # Theme
    cowplot::theme_cowplot(font_family = "atkinson", font_size = 22) +
    # Minor tweaks to spacing
    ggplot2::theme(
      legend.position = "bottom",
      strip.background = ggplot2::element_blank(), # Clean facet headers
      strip.text = ggplot2::element_text(face = "bold")
    )
}
identify_outliers <- function(data){
  # 1. Prepare the data (calculate Z-scores first)
  plot_data <- data %>%
    sf::st_drop_geometry() %>%
    dplyr::select(forested, where(is.numeric)) %>%
    dplyr::select(-any_of(c("lon", "lat", "year"))) %>% 
    tidyr::pivot_longer(cols = -forested, names_to = "variable", values_to = "value") %>%
    dplyr::group_by(variable) %>%
    # Wrap scale in as.numeric to fix the matrix warning
    dplyr::mutate(scaled_value = as.numeric(scale(value))) %>% 
    dplyr::ungroup()
  
  # 2. Plot
  ggplot2::ggplot(plot_data, aes(x = forested, y = scaled_value, fill = forested)) +
    # Draw the main distribution (suppress default outliers)
    ggplot2::geom_boxplot(outlier.shape = NA, alpha = 0.4) +
    
    # CRITICAL FIX: Only pass the extreme values to the points layer
    ggplot2::geom_jitter(
      data = dplyr::filter(plot_data, abs(scaled_value) > 3), 
      width = 0.2, 
      alpha = 0.6,
      size = 1.5 # Make the outliers pop
    ) +
    
    ggplot2::facet_wrap(~variable, scales = "free_y") +
    ggplot2::theme_minimal() +
    ggplot2::scale_fill_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
    ggplot2::labs(
      title = "Standardized Distribution & Extremes",
      subtitle = "Points indicate observations > 3 Standard Deviations from mean",
      y = "Z-Score (σ)"
    )
}

fetch_study_area <- function(target_states) {
  tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
    dplyr::filter(NAME %in% target_states) %>%
    rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
}

fetch_state_boundary <- function(state){
  tigris::states(cb = TRUE, resolution = "20m", progress_bar = FALSE) %>%
    dplyr::filter(NAME == state) %>%
    rmapshaper::ms_simplify(keep = 0.2, keep_shapes = TRUE)
}

create_elevation_raster <- function(boundary_sf, path) {
  elev_raster <- elevatr::get_elev_raster(locations = boundary_sf, z = 7, clip = "locations")
  elev_terra <- terra::rast(elev_raster) %>% terra::mask(terra::vect(boundary_sf))
  terra::writeRaster(elev_terra, path, overwrite = TRUE)
  return(path)
}
#' Save Outlier Diagnostic Map
#'
#' @description Generates a diagnostic map highlighting multivariate outliers (Z > 3)
#' overlaid on a hillshaded elevation raster. Uses the standardized forestry theme.
#'
#' @param data A data frame containing the analysis dataset (must include numeric columns and 'forested' factor).
#' @param boundary_sf An \code{sf} object representing the state boundary (e.g., Washington).
#' @param raster_path Character string. File path to the elevation raster (.tif).
#' @param output_path Character string. File path where the PNG will be saved.
#'
#' @importFrom terra rast terrain shade
#' @importFrom sf st_transform st_as_sf st_filter st_drop_geometry
#' @importFrom dplyr select filter if_any all_of mutate
#' @importFrom tibble tibble
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_manual labs ggsave guides guide_legend
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
#' @importFrom ggrepel geom_label_repel
#'
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
#' @export
save_outlier_map_png <- function(data, boundary_sf, raster_path, output_path) {
  
  # 1. Load & Process Raster
  elev_terra <- terra::rast(raster_path)
  slope  <- terra::terrain(elev_terra, "slope", unit = "radians")
  aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
  hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
  
  # 2. Prep Vectors
  wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
  
  data_sf <- data %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_filter(wa_boundary_sf)
  
  # Identify numeric columns automatically
  numeric_cols <- data_sf %>% 
    sf::st_drop_geometry() %>% 
    dplyr::select(where(is.numeric)) %>% 
    names()
  
  # Filter for >3 Z-score
  outliers_sf <- data_sf %>%
    dplyr::filter(dplyr::if_any(dplyr::all_of(numeric_cols), ~ abs(scale(.)) > 3))
  
  # WA Peak Labels
  wa_peaks_sf <- tibble::tibble(
    peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
    lat  = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
    lon  = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
  ) %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
  
  # 3. Plot
  plt <- ggplot2::ggplot() +
    # Layer A: Elevation Color
    tidyterra::geom_spatraster(data = elev_terra) + 
    tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
    
    # Layer B: Hillshade Shadow Mask
    tidyterra::geom_spatraster(
      data = hill_terra, 
      ggplot2::aes(alpha = ggplot2::after_stat(value)), 
      fill = "black", 
      show.legend = FALSE
    ) +
    ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
    
    # Layer C: Vectors (Boundary + Outlier Points)
    ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
    ggplot2::geom_sf(data = outliers_sf, ggplot2::aes(color = forested), size = 2.5) +
    ggplot2::scale_color_manual(values = c("Yes" = "#228B22", "No" = "#D2691E")) +
    
    # Layer D: Labels
    ggrepel::geom_label_repel(
      data = wa_peaks_sf,
      ggplot2::aes(label = peak, geometry = geometry),
      stat = "sf_coordinates",
      size = 5,
      fontface = "bold",
      box.padding = 0.6,
      min.segment.length = 0
    ) +
    
    # Theme Application
    theme_forestry_spatial() +
    labs(x = "", y = "")
  
  # Save
  ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
  return(output_path)
}

plot_wa_pca <- function(data) {
  # REVISED: Keep Lat/Lon in the numeric matrix
  numeric_mat <- data %>% 
    sf::st_drop_geometry() %>%
    dplyr::select(where(is.numeric)) %>%
    dplyr::select(-any_of(c("year"))) 
  
  pca_res <- stats::prcomp(numeric_mat, scale. = TRUE)
  
  # Calculate variance
  var_exp <- round(100 * pca_res$sdev^2 / sum(pca_res$sdev^2), 1)
  
  # Calculate the sum for the reader
  total_var <- var_exp[1] + var_exp[2]
  
  as.data.frame(pca_res$x) %>%
    dplyr::mutate(forested = data$forested) %>%
    ggplot2::ggplot(aes(x = PC1, y = PC2, color = forested)) +
    ggplot2::geom_point(alpha = 0.5) +
    ggplot2::scale_color_manual(values = c("Yes" = "#2D5A27", "No" = "#A0522D")) +
    ggplot2::theme_minimal(base_size = 14) +
    ggplot2::labs(
      title = "PCA: Biophysical + Geography",
      # REVISED SUBTITLE: Sums it up cleanly
      subtitle = paste0("First two components capture ", total_var, "% of total variance"),
      x = paste0("PC1 (", var_exp[1], "%)"),
      y = paste0("PC2 (", var_exp[2], "%)")
    ) +
    ggplot2::scale_x_continuous(limits = c(-10, 5))
}

plot_correlations <- function(data) {
  # 1. Data Prep (Same as before)
  cor_data <- data %>%
    sf::st_drop_geometry() %>%
    dplyr::mutate(Forested_Binary = dplyr::if_else(forested == "Yes", 1, 0)) %>%
    dplyr::select(Forested_Binary, dplyr::where(is.numeric)) %>% 
    dplyr::select(-any_of(c("year"))) 
  
  cor_df <- cor(cor_data, use = "pairwise.complete.obs") %>%
    as.data.frame() %>%
    dplyr::select(correlation = Forested_Binary) %>%
    tibble::rownames_to_column("variable") %>%
    dplyr::filter(variable != "Forested_Binary") %>% 
    dplyr::mutate(
      sign = dplyr::if_else(correlation > 0, "Positive", "Negative"),
      is_spatial = dplyr::if_else(variable %in% c("lat", "lon"), "Spatial", "Biophysical")
    )
  
  # 2. Plot with SPACE
  ggplot2::ggplot(cor_df, aes(x = reorder(variable, correlation), y = correlation)) +
    ggplot2::geom_segment(aes(xend = variable, yend = 0), color = "gray50") +
    ggplot2::geom_point(aes(color = sign, shape = is_spatial), size = 5) +
    
    ggplot2::coord_flip() +
    ggplot2::theme_minimal(base_size = 18) +
    
    ggplot2::scale_color_manual(values = c("Positive" = "#228B22", "Negative" = "#D2691E")) +
    
    ggplot2::labs(
      title = "What drives the 'Forested' classification?",
      subtitle = "Correlations with Forested (Yes=1). Note the impact of Latitude.",
      x = NULL,
      y = "Correlation Coefficient",
      color = "Direction",
      shape = "Variable Type"
    )
}

#' Plot Random Forest Variable Importance
#'
#' @description Fits a ranger Random Forest model to the provided data, calculates
#' permutation importance, and generates a lollipop chart. It distinguishes
#' between spatial (lat/lon) and biophysical predictors.
#' Uses the project's 'Atkinson' font theme via theme_forestry_plot().
#'
#' @param data An sf object or data frame containing the 'forested' target and predictors.
#'
#' @importFrom ranger ranger
#' @importFrom vip vi
#' @importFrom sf st_drop_geometry
#' @importFrom dplyr select mutate if_else any_of where
#' @importFrom ggplot2 ggplot aes geom_segment geom_point scale_shape_manual scale_color_manual labs
#' @export
plot_rf_importance <- function(data) {
  
  # 1. Clean Data & Fit Model
  model_data <- data %>%
    sf::st_drop_geometry() %>%
    dplyr::select(forested, dplyr::where(is.numeric)) %>%
    dplyr::select(-dplyr::any_of(c("year"))) %>%
    dplyr::mutate(forested = as.factor(forested))
  
  rf_model <- ranger::ranger(
    formula = forested ~ ., 
    data = model_data, 
    importance = "permutation",
    seed = 123
  )
  
  # 2. Extract importance & Add Spatial Flag
  imp_df <- vip::vi(rf_model) %>%
    dplyr::mutate(
      Type = dplyr::if_else(Variable %in% c("lat", "lon"), "Spatial", "Biophysical")
    )
  
  # 3. Tufte-style Lollipop with Shapes
  ggplot2::ggplot(imp_df, ggplot2::aes(x = Importance, y = reorder(Variable, Importance))) +
    # Stems
    ggplot2::geom_segment(ggplot2::aes(xend = 0, yend = Variable), color = "gray60", linewidth = 0.8) +
    
    # Dots (Mapped to Shape)
    ggplot2::geom_point(
      ggplot2::aes(shape = Type, color = Type), 
      size = 5
    ) +
    
    # --- VISUAL MAPPING ---
    ggplot2::scale_shape_manual(values = c("Biophysical" = 16, "Spatial" = 17)) +
    ggplot2::scale_color_manual(values = c("Biophysical" = "#228B22", "Spatial" = "black")) +
    
    # --- THEME FIX ---
    # We use your custom theme, which now enforces Atkinson fonts.
    # turning off grid because lollipops look cleaner without it
    theme_forestry_plot(font_size = 16, grid = FALSE) + 
    
    ggplot2::labs(
      title = "Random Forest Variable Importance",
      subtitle = "Permutation Importance. Note the high rank of spatial variables.",
      y = NULL,
      x = "Importance (Loss in Accuracy if scrambled)",
      shape = "Variable Type",
      color = "Variable Type"
    ) 
}
plot_umap_forested <- function(data) {
  
  data_clean <- data %>%
    drop_na()
  
  numeric_data <- data_clean %>%
    select(where(is.numeric)) %>%
    scale()
  
  set.seed(42)
  umap_fit <- umap::umap(numeric_data)
  
  # Format for plotting
  umap_df <- umap_fit$layout %>%
    as.data.frame() %>%
    dplyr::rename(UMAP1 = V1, UMAP2 = V2) %>%
    dplyr::mutate(forested = as.factor(data_clean$forested))
  
  # Generate Plot
  ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = forested)) +
    geom_point(alpha = 0.6, size = 1.5) +
    #scale_color_viridis_d(name = "Forested Status", begin = 0.2, end = 0.8) +
    scale_color_manual(
      name = "Forested?",
      # Assumes factor order is (No, Yes). Swap colors if order differs.
      values = c("#228B22", "#D2691E")
    ) +
    theme_minimal() +
    labs(
      title = "UMAP Projection of Forested Areas",
      subtitle = "Clustering based on numeric landscape features",
      x = "UMAP Dimension 1",
      y = "UMAP Dimension 2"
    )
}

plot_cv_strategies <- function(data) {
  
  # 1. SETUP DATA & ADD ID
  # We MUST add an ID column to track rows across splits
  forested_sf <- st_as_sf(
    data, 
    coords = c("lon", "lat"), 
    crs = 4326, 
    remove = FALSE 
  ) %>%
    mutate(id = row_number()) # <--- Critical: Adds the ID we need later
  
  # 2. DEFINE CUSTOM ECOLOGICAL DISTANCE
  dist_env <- function(x) {
    x %>%
      st_drop_geometry() %>%
      select(elevation, precip_annual, temp_annual_mean, roughness) %>%
      scale() %>% 
      dist()
  }
  
  # 3. CREATE THE SPLITS
  set.seed(42)
  
  # A. Random
  random_folds <- vfold_cv(forested_sf, v = 5)
  
  # B. Spatial Block
  spatial_folds <- spatial_block_cv(forested_sf, v = 5)
  
  # C. Environmental Clustering
  cluster_folds <- spatial_clustering_cv(
    forested_sf,
    v = 5,
    cluster_function = "hclust", 
    distance_function = dist_env
  )
  
  # 4. HELPER: EXTRACT FOLD IDs
  # This assigns a Fold Number (1-5) to every single point
  get_fold_ids <- function(splits, original_data) {
    results <- original_data %>% 
      mutate(fold = NA_character_)
    
    for(i in seq_along(splits$splits)) {
      test_ids <- assessment(splits$splits[[i]]) %>% pull(id)
      results <- results %>%
        mutate(fold = ifelse(id %in% test_ids, as.character(i), fold))
    }
    return(results)
  }
  
  # 5. PREPARE PLOTTING DATA
  df_random  <- get_fold_ids(random_folds, forested_sf)
  df_spatial <- get_fold_ids(spatial_folds, forested_sf)
  df_cluster <- get_fold_ids(cluster_folds, forested_sf)
  
  # 6. COMMON PLOT FUNCTION
  plot_folds <- function(df, title) {
    ggplot(df) +
      geom_sf(aes(color = fold), size = 0.5, alpha = 0.6) +
      scale_color_brewer(palette = "Set1", name = "Fold") + # <--- The Rainbow Colors
      #scale_color_discrete_qualitative(palette = "Harmonic", name = "Fold") +
      #scale_color_discrete_qualitative(palette = "Dark 3", name = "Fold") +
      #scale_color_discrete_qualitative(palette = "Dynamic", name = "Fold") +
      ggtitle(title) +
      theme_void() + 
      theme(
        plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
        legend.position = "none" 
      )
  }
  
  # 7. GENERATE & COMBINE
  p1 <- plot_folds(df_random, "Random\n(Confetti)")
  p2 <- plot_folds(df_spatial, "Spatial Block\n(Checkerboard)")
  p3 <- plot_folds(df_cluster, "Env. Clustering\n(Ecoregions)")
  
  p1 + p2 + p3 + plot_layout(ncol = 3)
}

plot_spatial_cv_comparison <- function(res_random, res_block, res_cluster) {
  
  # 1. Extract & Clean Data
  all_metrics <- bind_rows(
    res_random  %>% collect_metrics() %>% mutate(strategy = "Random CV"),
    res_block   %>% collect_metrics() %>% mutate(strategy = "Block CV"),
    res_cluster %>% collect_metrics() %>% mutate(strategy = "Cluster CV")
  ) %>%
    filter(.metric == "roc_auc") %>%
    mutate(
      recipe_label = case_when(
        str_detect(wflow_id, "^base") ~ "Coords",
        str_detect(wflow_id, "^non_spatial") ~ "No Coords",
        str_detect(wflow_id, "^extensible") ~ "Extensible",
        TRUE ~ "Unknown"
      ),
      model = str_remove(wflow_id, "(base_|non_spatial_|extensible_)"),
      strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")),
      recipe_label = factor(recipe_label, levels = c("Coords", "No Coords", "Extensible"))
    )
  
  # 2. Generate Plot
  ggplot(all_metrics, aes(x = model, y = mean, color = model)) +
    geom_hline(yintercept = 0.96, linetype = "dashed", color = "gray50", alpha = 0.5) +
    geom_point(size = 4) +
    geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), width = 0.2) +
    scale_color_discrete_qualitative(palette = "Dark 3") +
    facet_grid(recipe_label ~ strategy) +
    scale_y_continuous(labels = scales::number_format(accuracy = 0.01),
                       limits = c(.65, 1)) +
    labs(
      y = "ROC AUC Score",
      x = "Model Type",
      color = "Model"
    ) +
    theme_bw(base_size = 14) +
    theme(
      legend.position = "none",
      panel.grid.minor = element_blank(),
      strip.text = element_text(face = "bold", size = 10)
    )
}
plot_spatial_vip_comparison <- function(res_random, res_block, res_cluster, model_id = "base_rf") {
  
  # Internal helper to extract VIP from the best fold
  get_best_fold_vip <- function(wflow_obj, strategy_name) {
    
    # 1. Extract results for the specific model ID (e.g., "base_rf")
    resample_results <- wflow_obj %>% extract_workflow_set_result(model_id)
    
    # 2. Find the best fold
    best_fold_id <- resample_results %>%
      collect_metrics(summarize = FALSE) %>%
      filter(.metric == "roc_auc") %>%
      slice_max(.estimate, n = 1) %>%
      slice(1) %>% 
      pull(id)
    
    # 3. Get the split object for that fold
    best_split <- resample_results$splits[[which(resample_results$id == best_fold_id)]]
    
    # 4. Refit and calculate Importance
    wflow_obj %>%
      extract_workflow(model_id) %>%
      finalize_workflow(select_best(resample_results, metric = "roc_auc")) %>%
      fit(data = analysis(best_split)) %>% 
      extract_fit_parsnip() %>%
      vi() %>%
      mutate(strategy = strategy_name)
  }
  
  # Consolidate and clean
  vip_data <- bind_rows(
    get_best_fold_vip(res_random,  "Random CV"),
    get_best_fold_vip(res_block,   "Block CV"),
    get_best_fold_vip(res_cluster, "Cluster CV")
  ) %>%
    # Filter ONLY for Lat/Lon (The interaction term doesn't exist in your new recipe)
    filter(Variable %in% c("lat", "lon")) %>%
    mutate(
      Variable = case_when(
        Variable == "lon" ~ "Longitude",
        Variable == "lat" ~ "Latitude",
        TRUE ~ Variable
      ),
      strategy = factor(strategy, levels = c("Cluster CV", "Block CV", "Random CV"))
    )
  
  # Generate Plot
  ggplot(vip_data, aes(x = Importance, y = strategy)) +
    geom_segment(aes(x = 0, xend = Importance, y = strategy, yend = strategy), 
                 color = "gray20", linewidth = .65) +
    geom_point(aes(color = strategy), size = 5, show.legend = FALSE) +
    facet_grid(Variable ~ ., scales = "free_y", space = "free_y", switch = "y") +
    scale_color_discrete_qualitative(palette = "Dark 3") +
    scale_x_continuous(expand = c(0, 0)) + # Removed fixed limit (105) to let it scale automatically
    labs(
      title = paste("Spatial Overfitting Check:", model_id),
      subtitle = "Does the model rely on coordinates?",
      x = "Importance Score (Impurity)",
      y = NULL
    ) +
    theme_classic(base_size = 14) +
    theme(
      panel.grid.minor = element_blank()
    )
}
#                                                      vvv ADD THIS vvv
plot_model_stability <- function(res_random, res_block, res_cluster, model_id) {
  
  # Internal helper to extract per-fold metrics
  extract_fold_metrics <- function(wflow_obj, strategy_name) {
    wflow_obj %>%
      extract_workflow_set_result(id = model_id) %>% # <--- Now this works
      collect_metrics(summarize = FALSE) %>% 
      filter(.metric == "roc_auc") %>%
      mutate(strategy = strategy_name)
  }
  
  # Combine and factorize
  all_fold_metrics <- bind_rows(
    extract_fold_metrics(res_random,  "Random CV"),
    extract_fold_metrics(res_block,   "Block CV"),
    extract_fold_metrics(res_cluster, "Cluster CV")
  ) %>%
    mutate(strategy = factor(strategy, levels = c("Random CV", "Block CV", "Cluster CV")))
  
  # Generate Plot
  ggplot(all_fold_metrics, aes(x = strategy, y = .estimate, color = strategy)) +
    geom_violin(alpha = 0.4, outlier.shape = NA, width = 0.5) +
    geom_jitter(width = 0.1, size = 3, alpha = 0.8) +
    scale_fill_discrete_qualitative(palette = "Dark 3") +
    scale_color_discrete_qualitative(palette = "Dark 3") +
    scale_y_continuous(limits = c(0.80, 1.0), breaks = seq(0.80, 1.0, 0.05)) +
    labs(
      title = paste("Stability Analysis:", model_id), # <--- Helpful title
      x = NULL,
      y = "ROC AUC (per fold)",
      caption = ""
    ) +
    theme_classic(base_size = 14) +
    theme(
      legend.position = "none"
    )
}
plot_final_test_results <- function(final_fit_obj) {
  final_metrics <- final_fit_obj %>% 
    collect_metrics() %>% 
    filter(.metric %in% c("roc_auc", "accuracy")) %>% 
    mutate(
      .metric = case_when(
        .metric == "roc_auc" ~ "ROC AUC",
        .metric == "accuracy" ~ "Accuracy",
        TRUE ~ .metric
      )
    )
  
  ggplot(final_metrics, aes(x = .metric, y = .estimate, fill = .metric)) +
    geom_col(width = 0.6, show.legend = FALSE) +
    geom_text(aes(label = round(.estimate, 3)), vjust = -0.5, size = 5, fontface = "bold") +
    scale_fill_manual(values = c("ROC AUC" = "#2c3e50", "Accuracy" = "#18bc9c")) +
    scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2), expand = c(0, 0)) +
    labs(
      x = NULL,
      y = "Performance Score",
      caption = "Evaluation based on the 20% held-out Washington Test Set."
    ) +
    theme_classic(base_size = 14) +
    theme(axis.text.x = element_text(face = "bold", size = 12))
}

plot_final_confusion_matrix <- function(final_fit_obj) {
  # Extract the predictions from the last_fit object
  preds <- final_fit_obj %>% collect_predictions()
  
  # Generate the confusion matrix
  cm <- preds %>% 
    conf_mat(truth = forested, estimate = .pred_class)
  
  # Convert to a tidy format for plotting
  autoplot(cm, type = "heatmap") +
    scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
    theme_minimal(base_size = 14) +
    theme(
      panel.grid = element_blank(),
      axis.text = element_text(face = "bold"),
      plot.title = element_text(face = "bold"),
      legend.position = "none"
    )
}

plot_fold_mechanics <- function(wa_sf, boundary_wa_sf) {
  # 1. SETUP
  set.seed(42)
  
  # Ensure unique_id exists to prevent the "object not found" error
  if(!"unique_id" %in% names(wa_sf)) {
    wa_sf <- wa_sf %>% mutate(unique_id = row_number())
  }
  
  # 2. DEFINE CUSTOM DISTANCE
  dist_env <- function(x) {
    x %>% 
      st_drop_geometry() %>% 
      select(elevation, precip_annual, temp_annual_mean, roughness) %>%
      scale() %>% 
      dist()
  }
  
  # 3. CREATE SPLITS
  random_folds  <- vfold_cv(wa_sf, v = 5)
  spatial_folds <- spatial_block_cv(wa_sf, v = 5)
  cluster_folds <- spatial_clustering_cv(wa_sf, v = 5, cluster_function = "hclust", distance_function = dist_env)
  
  # 4. HELPER: EXTRACT STATUS
  get_status <- function(split_obj) {
    first_split <- split_obj$splits[[1]]
    assessment_ids <- assessment(first_split) %>% pull(unique_id)
    wa_sf %>%
      mutate(status = if_else(unique_id %in% assessment_ids, "Assessment (Test)", "Analysis (Train)"))
  }
  
  df_r <- get_status(random_folds)
  df_s <- get_status(spatial_folds)
  df_c <- get_status(cluster_folds)
  
  # 5. PLOTTER WITH BOUNDARY LAYER
  plot_one <- function(df, title) {
    ggplot() +
      # Layer 1: The state boundary provides the spatial context
      geom_sf(data = boundary_wa_sf, fill = "gray98", color = "gray85") +
      # Layer 2: The actual data points
      geom_sf(data = df, aes(color = status), size = 0.4, alpha = 0.7) +
      scale_color_manual(values = c("Assessment (Test)" = "#E16A86", "Analysis (Train)" = "#606060")) +
      ggtitle(title) +
      theme_void() +
      theme(
        legend.position = "none", 
        plot.title = element_text(hjust = 0.5, face = "bold", size = 10)
      )
  }
  
  # 6. COMBINE
  (plot_one(df_r, "Random\n(Confetti)") + 
      plot_one(df_s, "Spatial\n(Checkerboard)") + 
      plot_one(df_c, "Env. Clustering\n(Ecoregions)")) + 
    plot_layout(ncol = 3, guides = "collect") & 
    theme(legend.position = 'bottom', legend.title = element_blank())
}

plot_classic_kfold_diagram <- function(){
  library(tidyverse)
  
  # 1. Setup Parameters
  N_obs <- 100  
  K_folds <- 5  
  
  # 2. Create Mock Data and Assign Folds
  set.seed(42) 
  base_data <- tibble(
    obs_id = 1:N_obs,
    assigned_fold_group = sample(rep(1:K_folds, length.out = N_obs))
  )
  
  # 3. Expand Data for Plotting (Cross-Join)
  plot_data <- expand_grid(
    obs_id = 1:N_obs,
    iteration = 1:K_folds
  ) %>%
    left_join(base_data, by = "obs_id") %>%
    # Define Status: If the iteration matches the assigned fold group, it's Test data.
    mutate(
      status = case_when(
        iteration == assigned_fold_group ~ "Assessment (20%)",
        TRUE ~ "Analysis (80%)"
      ),
      # Convert iteration to factor and reverse levels so Fold 1 is at the top of plot
      iteration_fct = factor(iteration, levels = rev(1:K_folds))
    )
  
  # 4. Define Colors
  cv_colors <- c(
    "Analysis (80%)" = "#A6CEE3",
    "Assessment (20%)" = "#FDB462"
  )
  
  # 5. Generate the Plot
  ggplot(plot_data, aes(x = obs_id, y = iteration_fct, fill = status)) +
    geom_tile(color = "white", linewidth = 0.2) +
    scale_fill_manual(values = cv_colors, name = "Data Role") +
    labs(
      x = "Observations",
      y = "Folds"
    ) +
    
    # Theme adjustments
    theme_minimal(base_size = 16) +
    theme(
      panel.grid = element_blank(),       
      axis.text.x = element_blank(),      
      axis.ticks.x = element_blank(),
      legend.position = "bottom"
    )
}
create_performance_table <- function(results_cluster, final_fit_results) {
  require(dplyr)
  require(gt)
  require(tidymodels)
  
  # 1. Extract the WINNING Model Info (Name & Score)
  best_model_info <- results_cluster %>% 
    rank_results(rank_metric = "roc_auc", select_best = TRUE) %>% 
    filter(.metric == "roc_auc") %>% 
    slice(1) # Grab the #1 spot
  
  # Get the dynamic name and score
  best_model_name <- best_model_info$wflow_id
  cv_score <- best_model_info$mean
  
  # 2. Extract the Final Test Set Scores
  test_scores <- final_fit_results %>% 
    collect_metrics() %>% 
    select(.metric, .estimate) %>% 
    tidyr::pivot_wider(names_from = .metric, values_from = .estimate)
  
  # 3. Create the Combined Data Frame
  summary_data <- tibble(
    # FIXED: Uses the actual winner's name instead of hardcoded text
    Model = best_model_name, 
    `Validation AUC` = cv_score,
    `Test AUC` = test_scores$roc_auc,
    `Test Accuracy` = test_scores$accuracy
  )
  
  # 4. Render with gt
  summary_data %>% 
    gt() %>% 
    tab_header(
      title = md("**Final Model Performance**"),
      subtitle = paste("Winner:", best_model_name)
    ) %>% 
    fmt_number(
      columns = where(is.numeric),
      decimals = 3
    ) %>% 
    cols_align(
      align = "center",
      columns = where(is.numeric)
    ) %>% 
    tab_footnote(
      footnote = "Mean ROC AUC across spatial cross-validation folds.",
      locations = cells_column_labels(columns = `Validation AUC`)
    ) %>% 
    tab_footnote(
      footnote = "Evaluated on the 20% independent Washington holdout set.",
      locations = cells_column_labels(columns = contains("Test"))
    ) %>% 
    tab_options(
      table.border.top.color = "white",
      table.border.bottom.color = "black",
      column_labels.font.weight = "bold"
    )
}

plot_yeo_johnson <- function(data) {
  # 1. Prepare Recipe & Extract Lambda
  rec_yeo <- recipe(~ elevation, data = data) %>%
    step_YeoJohnson(elevation) %>%
    prep()
  
  lambda_val <- tidy(rec_yeo, number = 1) %>%
    filter(terms == "elevation") %>%
    pull(value) %>%
    round(2)
  
  # 2. Create Plotting Data
  plot_data <- data %>%
    select(elevation) %>%
    mutate(type = "Original") %>%
    bind_rows(
      bake(rec_yeo, new_data = data) %>%
        mutate(type = "Transformed") %>%
        rename(elevation_trans = elevation)
    )
  
  # 3. Setup Colors
  dark3_cols <- colorspace::qualitative_hcl(2, palette = "Dark 3")
  
  # 4. Plot A: Original
  p1 <- ggplot(filter(plot_data, type == "Original"), aes(x = elevation)) +
    geom_density(fill = dark3_cols[1], color = dark3_cols[1], alpha = 0.6, linewidth = 1) +
    scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    labs(x = "Elevation (m)", y = "Density") +
    theme_minimal(base_size = 14) +
    theme(
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
    )
  
  # 5. Plot B: Transformed
  p2 <- ggplot(filter(plot_data, type == "Transformed"), aes(x = elevation_trans)) +
    geom_density(fill = dark3_cols[2], color = dark3_cols[2], alpha = 0.6, linewidth = 1) +
    scale_x_continuous(labels = scales::label_comma(), expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0)) +
    labs(x = paste0("Yeo-Johnson (Lambda: ", lambda_val, ")"), y = "Density") +
    theme_minimal(base_size = 14) +
    theme(
      panel.grid.minor = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5)
    )
  
  # 6. Combine
  p1 + p2
}

#' Save Model Error Diagnostic Map
#'
#' @description Generates a diagnostic map highlighting prediction errors. It plots
#' misclassified points colored by the magnitude of the error (confidence in the wrong answer)
#' over a hillshaded elevation background.
#'
#' @param data A data frame containing model predictions (must include '.pred_class',
#' 'forested', '.pred_Yes', 'lon', and 'lat').
#' @param boundary_sf An \code{sf} object representing the state boundary.
#' @param raster_path Character string. File path to the elevation raster (.tif).
#' @param output_path Character string. File path where the PNG will be saved.
#'
#' @importFrom terra rast terrain shade
#' @importFrom sf st_transform st_as_sf st_filter
#' @importFrom dplyr filter mutate
#' @importFrom ggplot2 ggplot geom_sf aes after_stat scale_alpha scale_color_viridis_c labs ggsave guides guide_colorbar unit margin
#' @importFrom tidyterra geom_spatraster scale_fill_hypso_c
#'
#' @return The \code{output_path} (invisible), for integration with \code{targets}.
#' @export
save_error_map_png <- function(data, boundary_sf, raster_path, output_path) {
  
  # 1. Load & Process Hillshade (Standard Setup)
  elev_terra <- terra::rast(raster_path)
  slope  <- terra::terrain(elev_terra, "slope", unit = "radians")
  aspect <- terra::terrain(elev_terra, "aspect", unit = "radians")
  hill_terra <- terra::shade(slope, aspect, angle = 45, direction = 315)
  
  # 2. Prep Vectors & Calculate Errors
  wa_boundary_sf <- sf::st_transform(boundary_sf, 4326)
  
  # Filter to ONLY the mistakes and calculate how "wrong" they were
  error_sf <- data %>%
    # Keep only rows where the Hard Class Prediction was wrong
    dplyr::filter(.pred_class != forested) %>%
    # Calculate Magnitude: How far was the probability from the truth?
    dplyr::mutate(
      truth_num = ifelse(forested == "Yes", 1, 0),
      error_magnitude = abs(truth_num - .pred_Yes)
    ) %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_filter(wa_boundary_sf)
  
  # 3. Plot
  plt <- ggplot2::ggplot() +
    # --- Background Layers ---
    tidyterra::geom_spatraster(data = elev_terra) + 
    tidyterra::scale_fill_hypso_c(palette = "usgs-gswa2", name = "Elevation", na.value = "transparent") +
    tidyterra::geom_spatraster(data = hill_terra, ggplot2::aes(alpha = ggplot2::after_stat(value)), fill = "black", show.legend = FALSE) +
    ggplot2::scale_alpha(range = c(0.6, 0), guide = "none", na.value = 0) +
    ggplot2::geom_sf(data = wa_boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
    
    # --- The Error Layer ---
    ggplot2::geom_sf(
      data = error_sf, 
      ggplot2::aes(color = error_magnitude), 
      size = 3,       
      alpha = 0.9
    ) +
    ggplot2::scale_color_viridis_c(
      option = "plasma",
      name = "Error\nConfidence", 
      limits = c(0.5, 1.0),
      direction = -1 
    ) +
    
    # --- Theme Application ---
    theme_forestry_spatial() +
    ggplot2::labs(x = NULL, y = NULL) +
    
    # --- Legend Controls (Right Side Vertical) ---
    ggplot2::guides(
      fill = "none", 
      color = ggplot2::guide_colorbar(
        title.position = "top", 
        barwidth = ggplot2::unit(0.5, "cm"),   # Narrow width
        barheight = ggplot2::unit(6, "cm")     # Tall height for right side
      )
    ) +
    ggplot2::theme(legend.position = "right")
  
  # Save
  ggplot2::ggsave(output_path, plot = plt, width = 10, height = 6, dpi = 300, bg = "white")
  return(output_path)
}

#' Calculate Area of Applicability Data
#'
#' @description Generates the Area of Applicability (AOA) scores (Dissimilarity Index)
#' for the Georgia extrapolation dataset based on the Washington training data.
#'
#' @param train_data Dataframe. The training data from Washington.
#' @param test_data Dataframe. The extrapolation data from Georgia.
#' @param predictors Character vector. The list of predictor variable names.
#'
#' @importFrom dplyr select all_of bind_cols
#' @importFrom tibble tibble
#' @importFrom waywiser ww_area_of_applicability
#' @importFrom stats predict
#' @importFrom sf st_as_sf
#'
#' @return An \code{sf} object containing the Georgia data with an added 'di' (Dissimilarity Index) column.
#' @export
calculate_ga_aoa <- function(train_data, test_data, predictors) {
  
  # 1. Prepare Environments
  train_env <- train_data %>% dplyr::select(dplyr::all_of(predictors))
  test_env  <- test_data %>% dplyr::select(dplyr::all_of(predictors))
  
  # 2. Define "Flat" Importance
  importance <- tibble::tibble(term = predictors, estimate = 1)
  
  # 3. Fit AOA
  aoa_model <- waywiser::ww_area_of_applicability(
    x = train_env,
    importance = importance
  )
  
  # 4. Predict
  aoa_results <- stats::predict(aoa_model, test_env)
  
  # 5. Return Spatial Object
  test_data %>%
    dplyr::bind_cols(aoa_results) %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
}

#' Plot Georgia Area of Applicability (AOA)
#'
#' @description Plots the pre-calculated Dissimilarity Index (DI) for Georgia.
#'
#' @param aoa_sf An \code{sf} object containing the 'di' column (output of \code{calculate_ga_aoa}).
#'
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_viridis_c labs guides guide_colorbar unit theme
#'
#' @return A \code{ggplot} object showing the Dissimilarity Index map.
#' @export
plot_georgia_aoa <- function(aoa_sf) {
  
  ggplot2::ggplot(aoa_sf) +
    # Points colored by Dissimilarity Index (DI)
    ggplot2::geom_sf(ggplot2::aes(color = di), size = 1, alpha = 0.8) +
    
    # Color Scale
    ggplot2::scale_color_viridis_c(
      option = "magma", 
      direction = 1, 
      name = "Dissimilarity\nIndex (DI)"
    ) +
    
    # Theme
    theme_forestry_spatial() +
    ggplot2::labs(x = NULL, y = NULL) +
    
    # Legend Control
    ggplot2::guides(
      color = ggplot2::guide_colorbar(
        title.position = "top", 
        barwidth = ggplot2::unit(0.5, "cm"),   
        barheight = ggplot2::unit(6, "cm")     
      )
    ) +
    ggplot2::theme(legend.position = "right")
}

# Predict on a new region (External Validation)
predict_external_region <- function(final_fit, new_data) {
  wa_model <- final_fit$.workflow[[1]]
  
  # FIX: Mock ALL columns that were in the training data
  # The model expects 'geometry' because training data was an sf object
  clean_data <- new_data %>%
    dplyr::mutate(
      northness = NA_real_, 
      county = NA_character_,
      geometry = NA # Tidymodels will see this exists, then ignore it (as it's not a predictor)
    )
  
  # Predict Class and Probability
  preds <- predict(wa_model, clean_data) %>%
    dplyr::bind_cols(predict(wa_model, clean_data, type = "prob")) %>%
    dplyr::bind_cols(new_data %>% dplyr::select(lat, lon, forested)) 
  
  return(preds)
}

#' Plot Georgia Forest Comparison
#'
#' @description Creates a side-by-side comparison of forest cover for Georgia.
#' The left plot (a) is the Model Prediction, and the right plot (b) is the Actual Data.
#' Features a shared right-side legend and standardized spatial styling.
#'
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
#'
#' @importFrom dplyr filter
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes scale_color_manual labs theme guides guide_legend element_text
#' @importFrom patchwork plot_layout wrap_plots plot_annotation
#'
#' @return A \code{patchwork} object containing the labeled comparison plot.
#' @export
plot_ga_comparison_map <- function(pred_data, boundaries) {
  
  # 1. Prepare Data & Boundaries
  ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
  
  plot_data <- pred_data %>% 
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_transform(sf::st_crs(ga_boundary))
  
  # 2. Helper Function (No Title Argument)
  create_map <- function(data, fill_col) {
    ggplot2::ggplot() +
      # Background
      ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
      # Points
      ggplot2::geom_sf(data = data, 
                       ggplot2::aes(color = {{ fill_col }}), 
                       size = 0.5, alpha = 0.6) +
      # Colors
      ggplot2::scale_color_manual(
        name = "Forest Cover",
        values = c("Yes" = "#228B22", "No" = "#D2691E"),
        labels = c("Forest", "Non-Forest")
      ) +
      # Labels (No Title)
      ggplot2::labs(title = NULL, x = NULL, y = NULL) +
      
      # Theme
      theme_forestry_spatial() +
      
      # Define Legend Here to avoid Patchwork "Order of Ops" Error
      ggplot2::guides(
        color = ggplot2::guide_legend(
          override.aes = list(size = 4, alpha = 1), 
          title.position = "top",
          ncol = 1 
        )
      ) +
      
      ggplot2::theme(
        legend.title = ggplot2::element_text(face = "bold", size = 12),
        legend.text = ggplot2::element_text(size = 10)
      )
  }
  
  # 3. Create Subplots
  p_pred   <- create_map(plot_data, .pred_class) # Left: Predicted
  p_actual <- create_map(plot_data, forested)    # Right: Actual
  
  # 4. Patchwork Layout with (a)/(b) Tags
  combined_plot <- (p_pred + p_actual) + 
    patchwork::plot_layout(guides = "collect") +
    patchwork::plot_annotation(
      tag_levels = 'a', 
      tag_prefix = '(', 
      tag_suffix = ')',
      theme = theme_forestry_spatial() # Ensure tags match font
    ) & 
    ggplot2::theme(
      legend.position = "right",
      plot.tag = ggplot2::element_text(size = 20, face = "bold") # Make tags prominent
    )
  
  return(combined_plot)
}
plot_ga_confusion_matrix <- function(pred_data) {
  # 1. Generate the confusion matrix object
  cm <- pred_data %>% 
    yardstick::conf_mat(truth = forested, estimate = .pred_class)
  
  # 2. Plot using autoplot to match the previous style exactly
  plt <- ggplot2::autoplot(cm, type = "heatmap") +
    
    # MATCHED: The exact blue gradient from your previous slide
    ggplot2::scale_fill_gradient(low = "#ebf5fb", high = "#2980b9") +
    ggplot2::theme_minimal(base_size = 14) +
    ggplot2::theme(
      panel.grid = ggplot2::element_blank(),
      axis.text = ggplot2::element_text(face = "bold"),
      plot.title = ggplot2::element_text(face = "bold", hjust = 0.5), # Added hjust for center alignment
      legend.position = "none"
    )
  
  return(plt)
}
#' Plot Failure Mechanism Comparison
#'
#' @description Creates a side-by-side diagnostic plot returning a patchwork object.
#' (a) The Area of Applicability (Dissimilarity Index) showing where the model is extrapolating.
#' (b) The spatial distribution of actual classification errors.
#'
#' @param aoa_data Dataframe containing the AOA results (must have 'di', 'lon', 'lat').
#' @param pred_data Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).
#' @param boundaries An \code{sf} object containing state boundaries (must include "GA" or "Georgia").
#'
#' @importFrom dplyr filter mutate case_when
#' @importFrom sf st_as_sf st_transform st_crs
#' @importFrom ggplot2 ggplot geom_sf aes labs theme guides guide_legend guide_colorbar element_rect element_text unit
#' @importFrom colorspace scale_color_discrete_qualitative
#' @importFrom patchwork plot_layout plot_annotation wrap_plots
#'
#' @return A \code{patchwork} object containing the side-by-side comparison.
#' @export
plot_failure_mechanism <- function(aoa_data, pred_data, boundaries) {
  
  # 1. Prep Boundaries
  ga_boundary <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
  target_crs  <- sf::st_crs(ga_boundary)
  
  # 2. Prep Map A: Dissimilarity (Risk)
  aoa_sf <- aoa_data %>% 
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_transform(target_crs)
  
  p_risk <- ggplot2::ggplot() +
    ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
    ggplot2::geom_sf(data = aoa_sf, ggplot2::aes(color = di), size = 0.5, alpha = 0.8) +
    ggplot2::scale_color_viridis_c(
      option = "magma", 
      direction = 1, 
      name = "Dissimilarity Idx."
    ) +
    ggplot2::labs(x = NULL, y = NULL) +
    theme_forestry_spatial() +
    ggplot2::guides(
      color = ggplot2::guide_colorbar(
        title.position = "top", 
        title.hjust = 0.5,
        barwidth = ggplot2::unit(4.5, "cm"), 
        barheight = ggplot2::unit(.3, "cm")
      )
    ) +
    ggplot2::theme(legend.position ="bottom")
  
  # 3. Prep Map B: Failures (Effect)
  errors_sf <- pred_data %>%
    dplyr::filter(.pred_class != forested) %>%
    dplyr::mutate(
      error_type = dplyr::case_when(
        .pred_class == "Yes" & forested == "No" ~ "False Positive",
        .pred_class == "No" & forested == "Yes" ~ "False Negative"
      )
    ) %>%
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
    sf::st_transform(target_crs)
  
  p_fail <- ggplot2::ggplot() +
    ggplot2::geom_sf(data = ga_boundary, fill = "grey98", color = "grey80") +
    ggplot2::geom_sf(data = errors_sf, ggplot2::aes(color = error_type), size = 0.6, alpha = 0.6) +
    colorspace::scale_color_discrete_qualitative(
      palette = "Dark 3", 
      name = "Error Type"
    ) +
    ggplot2::labs(x = NULL, y = NULL) +
    theme_forestry_spatial() +
    ggplot2::guides(
      color = ggplot2::guide_legend(
        override.aes = list(size = 4, alpha = 1), 
        title.position = "top",
        title.hjust = 0.5
      )
    ) +
    ggplot2::theme(legend.position = "bottom")
  
  # 4. Patchwork Assembly
  combined_plot <- (p_risk + p_fail) + 
    patchwork::plot_layout(guides = "keep") + 
    patchwork::plot_annotation(
      tag_levels = 'a', 
      tag_prefix = '(', 
      tag_suffix = ')',
      theme = theme_forestry_spatial()
    ) &
    ggplot2::theme(
      plot.tag = ggplot2::element_text(size = 20, face = "bold"),
      plot.background = ggplot2::element_rect(fill = "transparent", color = NA)
    )
  
  return(combined_plot)
}

#' Plot Annual Rainfall Comparison (Clipped Hexes)
#'
#' @description Creates a polished side-by-side comparison of annual precipitation.
#' Hexagons are spatially generated and clipped to the exact state boundaries
#' to eliminate "bleeding" edges. Uses \code{theme_forestry_void} with explicit
#' font sizing to match topographic maps.
#'
#' @param wa_data Dataframe containing Washington data (requires 'precip_annual', 'lat', 'lon').
#' @param ga_data Dataframe containing Georgia data (requires 'precip_annual', 'lat', 'lon').
#' @param boundaries An \code{sf} object containing state boundaries.
#' @param bins Integer. Number of hexes across the state width. Default is 30.
#' @param max_limit Numeric. The visual cap for rainfall (mm) to ensure comparable scales. Default is 2500.
#'
#' @importFrom dplyr filter mutate group_by summarize inner_join row_number
#' @importFrom sf st_as_sf st_transform st_crs st_make_grid st_intersection st_join st_drop_geometry
#' @importFrom ggplot2 ggplot geom_sf aes scale_fill_distiller guide_colorbar unit theme element_text element_rect margin coord_sf rel
#' @importFrom scales squish
#' @importFrom patchwork plot_layout plot_annotation
#'
#' @return A \code{patchwork} object containing the side-by-side comparison.
#' @export
plot_precip_hex_comparison <- function(wa_data, ga_data, boundaries, bins = 30, max_limit = 2500) {
  
  # 1. Clean Data
  wa_clean <- wa_data %>% 
    dplyr::filter(!is.na(precip_annual)) %>% 
    dplyr::mutate(precip_annual = as.numeric(precip_annual))
  
  ga_clean <- ga_data %>% 
    dplyr::filter(!is.na(precip_annual)) %>% 
    dplyr::mutate(precip_annual = as.numeric(precip_annual))
  
  # 2. Helper: Generate Clipped Hex Grid
  get_clipped_hex_layer <- function(point_data, boundary_sf, n_bins) {
    target_crs <- sf::st_crs(boundary_sf)
    points_sf  <- sf::st_as_sf(point_data, coords = c("lon", "lat"), crs = 4326) %>% 
      sf::st_transform(target_crs)
    
    grid <- sf::st_make_grid(boundary_sf, n = n_bins, square = FALSE, flat_topped = TRUE) %>%
      sf::st_as_sf() %>% 
      dplyr::mutate(hex_id = dplyr::row_number())
    
    grid_clipped <- sf::st_intersection(grid, boundary_sf)
    joined <- sf::st_join(points_sf, grid_clipped, join = sf::st_within)
    
    hex_summary <- joined %>%
      dplyr::filter(!is.na(hex_id)) %>% 
      dplyr::group_by(hex_id) %>%
      dplyr::summarize(precip_mean = mean(precip_annual, na.rm = TRUE))
    
    final_layer <- grid_clipped %>%
      dplyr::inner_join(sf::st_drop_geometry(hex_summary), by = "hex_id")
    
    return(final_layer)
  }
  
  # 3. Process States
  wa_bnd <- boundaries %>% dplyr::filter(NAME == "Washington" | NAME == "WA")
  ga_bnd <- boundaries %>% dplyr::filter(NAME == "Georgia" | NAME == "GA")
  
  wa_hex_sf <- get_clipped_hex_layer(wa_clean, wa_bnd, bins)
  ga_hex_sf <- get_clipped_hex_layer(ga_clean, ga_bnd, bins)
  
  # 4. Plotting Helper
  plot_hex_sf <- function(hex_data, boundary_sf) {
    ggplot2::ggplot() +
      ggplot2::geom_sf(data = hex_data, ggplot2::aes(fill = precip_mean), color = NA) +
      ggplot2::geom_sf(data = boundary_sf, fill = NA, color = "black", linewidth = 0.5) +
      
      # [CRITICAL] Removes grid lines (datum=NA) and removes padding (expand=FALSE)
      ggplot2::coord_sf(datum = NA, expand = FALSE) + 
      
      ggplot2::scale_fill_distiller(
        palette = "Blues",
        direction = 1,
        limits = c(0, max_limit),
        oob = scales::squish,
        name = "Mean Annual Precip (mm)", 
        guide = ggplot2::guide_colorbar(
          title.position = "top",
          title.hjust = 0.5,
          barwidth = ggplot2::unit(8, "cm"),
          barheight = ggplot2::unit(0.4, "cm"),
          frame.colour = "black",
          ticks.colour = "black"
        )
      ) +
      # Use Base Size 16 to match Topo Plot
      theme_forestry_void(base_size = 16) +
      ggplot2::theme(legend.position = "none")
  }
  
  p_wa <- plot_hex_sf(wa_hex_sf, wa_bnd)
  p_ga <- plot_hex_sf(ga_hex_sf, ga_bnd)
  
  # 5. Patchwork Assembly
  combined_plot <- (p_wa + p_ga) +
    patchwork::plot_layout(guides = "collect") + 
    patchwork::plot_annotation(
      tag_levels = 'a', tag_prefix = '(', tag_suffix = ')'
    ) & 
    # Ensure the combined plot also uses base_size 16
    theme_forestry_void(base_size = 28) 
  
  return(combined_plot)
}

#' Get Major Peaks for Plotting
#'
#' Internal helper to return peak locations for WA and GA.
#' 
#' @param state_name String. Either "Washington" or "Georgia".
#' @return An sf object containing peak names and locations.
#' 
#' @importFrom tibble tibble
#' @importFrom sf st_as_sf
#' @noRd
get_state_peaks <- function(state_name) {
  if (state_name == "Washington") {
    tibble::tibble(
      peak = c("Mt. Rainier", "Mt. Adams", "Mt. Baker", "Glacier Peak", "Mt. St. Helens", "Mt. Olympus"),
      lat  = c(46.8523, 46.2024, 48.7767, 48.1125, 46.1914, 47.8013),
      lon  = c(-121.7603, -121.4909, -121.8132, -121.1139, -122.1956, -123.7111)
    ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
  } else if (state_name == "Georgia") {
    tibble::tibble(
      peak = c("Brasstown Bald", "Rabun Bald", "Blood Mtn"),
      lat  = c(34.8743, 34.9660, 34.7398),
      lon  = c(-83.8111, -83.2996, -83.9367)
    ) %>% sf::st_as_sf(coords = c("lon", "lat"), crs = 4326)
  } else { NULL }
}

#' Plot Spatial Autocorrelation Exploration
#'
#' @description Generates a Moran Scatterplot to visualize spatial 
#' autocorrelation in elevation data using a 5km neighborhood.
#'
#' @param wa_data A dataframe or tibble containing elevation, lat, and lon columns.
#'
#' @return A ggplot object showing standardized elevation vs. spatially lagged elevation.
#'
#' @importFrom sf st_as_sf st_transform st_coordinates st_drop_geometry
#' @importFrom spdep dnearneigh nb2listw lag.listw card
#' @importFrom ggplot2 ggplot aes geom_point geom_smooth geom_hline geom_vline labs theme_minimal
#' @importFrom dplyr mutate
#'
#' @export
plot_spatial_exploration <- function(wa_data) {
  
  # 1. Convert to sf and project to meters (Washington State Plane North)
  # This ensures the 5000 distance in dnearneigh is 5,000 meters.
  wa_sf <- wa_data |>
    sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |>
    sf::st_transform(2285) 
  
  # 2. Extract coordinates and build weights (5km radius)
  coords <- sf::st_coordinates(wa_sf)
  nb <- spdep::dnearneigh(coords, 0, 5000)
  
  # Remove points with zero neighbors to prevent divide-by-zero errors
  ids_with_nb <- which(spdep::card(nb) > 0)
  nb <- subset(nb, spdep::card(nb) > 0)
  wa_sf <- wa_sf[ids_with_nb, ]
  
  # Row-standardized weights matrix
  lw <- spdep::nb2listw(nb, style = "W", zero.policy = TRUE)
  
  # 3. Prep data for plotting
  plot_df <- wa_sf |>
    sf::st_drop_geometry() |>
    dplyr::mutate(
      # Scale elevation to get z-scores
      elev_scaled = as.vector(scale(elevation)),
      # Calculate the spatial lag (average elevation of neighbors)
      elev_lag = spdep::lag.listw(lw, elev_scaled, zero.policy = TRUE)
    )
  
  # 4. Build Plot Object
  p <- ggplot2::ggplot(plot_df, ggplot2::aes(x = elev_scaled, y = elev_lag)) +
    ggplot2::geom_point(alpha = 0.2, color = "#2c3e50") +
    ggplot2::geom_smooth(
      method = "lm", 
      color = "#e74c3c", 
      linetype = "dashed", 
      formula = y ~ x
    ) +
    ggplot2::geom_hline(yintercept = 0, alpha = 0.3) +
    ggplot2::geom_vline(xintercept = 0, alpha = 0.3) +
    ggplot2::labs(
      title = "Spatial Autocorrelation: Moran Scatterplot",
      subtitle = "Washington Elevation (5km neighborhood)",
      x = "Standardized Elevation (Z-score)",
      y = "Spatially Lagged Elevation (Neighbors)"
    ) +
    ggplot2::theme_minimal()
  
  return(p)
}