initial commit

This commit is contained in:
2026-02-10 04:52:37 -05:00
commit 0476f6f8f8
65 changed files with 15368 additions and 0 deletions

22
man/calculate_ga_aoa.Rd Normal file
View File

@@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{calculate_ga_aoa}
\alias{calculate_ga_aoa}
\title{Calculate Area of Applicability Data}
\usage{
calculate_ga_aoa(train_data, test_data, predictors)
}
\arguments{
\item{train_data}{Dataframe. The training data from Washington.}
\item{test_data}{Dataframe. The extrapolation data from Georgia.}
\item{predictors}{Character vector. The list of predictor variable names.}
}
\value{
An \code{sf} object containing the Georgia data with an added 'di' (Dissimilarity Index) column.
}
\description{
Generates the Area of Applicability (AOA) scores (Dissimilarity Index)
for the Georgia extrapolation dataset based on the Washington training data.
}

28
man/combine_forest.Rd Normal file
View File

@@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{combine_forest}
\alias{combine_forest}
\title{Combine Washington and Georgia Forest Data}
\usage{
combine_forest(wa_data, ga_data)
}
\arguments{
\item{wa_data}{A data frame containing the Washington forest inventory data.}
\item{ga_data}{A data frame containing the Georgia forest inventory data.}
}
\value{
A single combined data frame with an additional column \code{.id}
(renamed to "state") indicating the source ("WA" or "GA").
}
\description{
Merges the Washington and Georgia datasets into a single data frame, adding a
column to identify the source state.
}
\examples{
\dontrun{
combined <- combine_forest(wa_raw, ga_raw)
table(combined$state)
}
}

View File

@@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{create_stats_summary}
\alias{create_stats_summary}
\title{Create Statistical Summary of Forest Data}
\usage{
create_stats_summary(data)
}
\arguments{
\item{data}{A data frame or sf object containing the forest data.}
}
\value{
A data frame with descriptive statistics (mean, sd, min, max, etc.),
sorted by descending absolute kurtosis.
}
\description{
Generates descriptive statistics for numeric variables in the dataset,
excluding spatial coordinates (lat/lon) and year. It sorts the results
by kurtosis to highlight non-normal distributions.
}

View File

@@ -0,0 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/forestedAnalysis-package.R
\docType{package}
\name{forestedAnalysis-package}
\alias{forestedAnalysis}
\alias{forestedAnalysis-package}
\title{forestedAnalysis: Spatial Cross-Validation and AOA Analysis of Forest Cover}
\description{
A research compendium analyzing forest cover data in Washington and Georgia. It evaluates the Area of Applicability (AOA) and demonstrates model failure during spatial extrapolation.
}
\author{
\strong{Maintainer}: Rob Wiederstein \email{khuon68@gmail.com}
}
\keyword{internal}

23
man/get_epa_ecoregions.Rd Normal file
View File

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{get_epa_ecoregions}
\alias{get_epa_ecoregions}
\title{Download EPA Level III Ecoregions Data}
\usage{
get_epa_ecoregions(url, dest_dir = "data/epa")
}
\arguments{
\item{url}{Character string. The direct URL to the EPA Ecoregions zip file.}
\item{dest_dir}{Character string. The local directory where the data should
be saved. Defaults to "data/epa".}
}
\value{
A character string containing the full file path to the downloaded zip file.
This return value is designed to be tracked by \code{targets}.
}
\description{
Downloads the EPA Level III Ecoregions shapefile (zip format)
to a local directory. Implements a caching check to avoid re-downloading
if the file already exists.
}

35
man/helper_save_fig.Rd Normal file
View File

@@ -0,0 +1,35 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{helper_save_fig}
\alias{helper_save_fig}
\title{Helper: Save Plot for Quarto Slide}
\usage{
helper_save_fig(
plot_obj,
name,
type = c("map", "plot"),
width = 10,
height = 6.18,
dpi = 300
)
}
\arguments{
\item{plot_obj}{The ggplot object to save.}
\item{name}{A short descriptive name (e.g., "wa_ecoregions").}
\item{type}{Either "map" or "plot". Adds this prefix to the filename.}
\item{width}{Width in inches (default: 10).}
\item{height}{Height in inches (default: 6.18).}
\item{dpi}{Resolution (default: 300).}
}
\value{
The full file path (invisibly).
}
\description{
Saves a ggplot object as a PNG, sized to fit comfortably
below a standard slide title, with robust font handling.
}

View File

@@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_ecoregion_comparison}
\alias{plot_ecoregion_comparison}
\title{Plot Ecoregion Complexity Comparison (WA vs GA)}
\usage{
plot_ecoregion_comparison(eco_data)
}
\arguments{
\item{eco_data}{An \code{sf} object containing ecoregion polygons. Must contain
columns \code{STATE_NAME} and \code{US_L3NAME}.}
}
\value{
A \code{patchwork} object containing the combined plot.
}
\description{
Generates a side-by-side comparison of Level III ecoregions for Washington
and Georgia. It uses a "void" theme, qualitative colors, and carefully tuned
label repulsion settings to avoid overlapping text.
}

View File

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_failure_mechanism}
\alias{plot_failure_mechanism}
\title{Plot Failure Mechanism Comparison}
\usage{
plot_failure_mechanism(aoa_data, pred_data, boundaries)
}
\arguments{
\item{aoa_data}{Dataframe containing the AOA results (must have 'di', 'lon', 'lat').}
\item{pred_data}{Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).}
\item{boundaries}{An \code{sf} object containing state boundaries (must include "GA" or "Georgia").}
}
\value{
A \code{patchwork} object containing the side-by-side comparison.
}
\description{
Creates a side-by-side diagnostic plot returning a patchwork object.
(a) The Area of Applicability (Dissimilarity Index) showing where the model is extrapolating.
(b) The spatial distribution of actual classification errors.
}

View File

@@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_ga_comparison_map}
\alias{plot_ga_comparison_map}
\title{Plot Georgia Forest Comparison}
\usage{
plot_ga_comparison_map(pred_data, boundaries)
}
\arguments{
\item{pred_data}{Dataframe containing prediction results (columns: .pred_class, forested, lon, lat).}
\item{boundaries}{An \code{sf} object containing state boundaries (must include "GA" or "Georgia").}
}
\value{
A \code{patchwork} object containing the labeled comparison plot.
}
\description{
Creates a side-by-side comparison of forest cover for Georgia.
The left plot (a) is the Model Prediction, and the right plot (b) is the Actual Data.
Features a shared right-side legend and standardized spatial styling.
}

17
man/plot_georgia_aoa.Rd Normal file
View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_georgia_aoa}
\alias{plot_georgia_aoa}
\title{Plot Georgia Area of Applicability (AOA)}
\usage{
plot_georgia_aoa(aoa_sf)
}
\arguments{
\item{aoa_sf}{An \code{sf} object containing the 'di' column (output of \code{calculate_ga_aoa}).}
}
\value{
A \code{ggplot} object showing the Dissimilarity Index map.
}
\description{
Plots the pre-calculated Dissimilarity Index (DI) for Georgia.
}

View File

@@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_precip_hex_comparison}
\alias{plot_precip_hex_comparison}
\title{Plot Annual Rainfall Comparison (Clipped Hexes)}
\usage{
plot_precip_hex_comparison(
wa_data,
ga_data,
boundaries,
bins = 30,
max_limit = 2500
)
}
\arguments{
\item{wa_data}{Dataframe containing Washington data (requires 'precip_annual', 'lat', 'lon').}
\item{ga_data}{Dataframe containing Georgia data (requires 'precip_annual', 'lat', 'lon').}
\item{boundaries}{An \code{sf} object containing state boundaries.}
\item{bins}{Integer. Number of hexes across the state width. Default is 30.}
\item{max_limit}{Numeric. The visual cap for rainfall (mm) to ensure comparable scales. Default is 2500.}
}
\value{
A \code{patchwork} object containing the side-by-side comparison.
}
\description{
Creates a polished side-by-side comparison of annual precipitation.
Hexagons are spatially generated and clipped to the exact state boundaries
to eliminate "bleeding" edges. Uses \code{theme_forestry_void} with explicit
font sizing to match topographic maps.
}

View File

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_regional_comparison}
\alias{plot_regional_comparison}
\title{Plot Regional Comparison of Forested Data (WA vs GA)}
\usage{
plot_regional_comparison(data, boundaries)
}
\arguments{
\item{data}{A data frame containing the forest point data. Must contain
columns \code{lon}, \code{lat}, \code{state}, and \code{forested}.}
\item{boundaries}{An \code{sf} object containing state boundaries. Must contain
a \code{NAME} column.}
}
\value{
A \code{patchwork} object containing the combined side-by-side maps.
}
\description{
Generates a side-by-side comparison of forest cover for Washington
and Georgia. It handles font registration (Atkinson Hyperlegible), spatial
transformations, and creates a combined plot with a shared legend.
}

17
man/plot_rf_importance.Rd Normal file
View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_rf_importance}
\alias{plot_rf_importance}
\title{Plot Random Forest Variable Importance}
\usage{
plot_rf_importance(data)
}
\arguments{
\item{data}{An sf object or data frame containing the 'forested' target and predictors.}
}
\description{
Fits a ranger Random Forest model to the provided data, calculates
permutation importance, and generates a lollipop chart. It distinguishes
between spatial (lat/lon) and biophysical predictors.
Uses the project's 'Atkinson' font theme via theme_forestry_plot().
}

View File

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_spatial_exploration}
\alias{plot_spatial_exploration}
\title{Plot Spatial Autocorrelation Exploration}
\usage{
plot_spatial_exploration(wa_data)
}
\arguments{
\item{wa_data}{A dataframe or tibble containing elevation, lat, and lon columns.}
}
\value{
A ggplot object showing standardized elevation vs. spatially lagged elevation.
}
\description{
Generates a Moran Scatterplot to visualize spatial
autocorrelation in elevation data using a 5km neighborhood.
}

11
man/plot_state_topo.Rd Normal file
View File

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_state_topo}
\alias{plot_state_topo}
\title{Create Single State Topo Plot}
\usage{
plot_state_topo(data, boundary_sf, raster_path, state_name)
}
\description{
Generates a topo map with manually tuned label placement for Georgia.
}

View File

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_theme_diagnostic}
\alias{plot_theme_diagnostic}
\title{Simplified Theme Diagnostic}
\usage{
plot_theme_diagnostic()
}
\description{
Uses built-in NC data to verify theme_forestry_spatial.
}

11
man/plot_us_map.Rd Normal file
View File

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{plot_us_map}
\alias{plot_us_map}
\title{Plot US Map with Forestry Theme}
\usage{
plot_us_map()
}
\description{
Highlights Washington and Georgia using standardized presentation fonts.
}

32
man/process_ecoregions.Rd Normal file
View File

@@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{process_ecoregions}
\alias{process_ecoregions}
\title{Process and Clip EPA Ecoregions}
\usage{
process_ecoregions(
zip_path,
target_states = c("Washington", "Georgia"),
simplify_tol = 0.05
)
}
\arguments{
\item{zip_path}{Character string. The file path to the zipped EPA shapefile.}
\item{target_states}{Character vector. The names of the states to clip the
ecoregions to. Defaults to \code{c("Washington", "Georgia")}.}
\item{simplify_tol}{Numeric. The simplification tolerance passed to
\code{rmapshaper::ms_simplify}. Range is 0-1, where higher numbers remove more detail.
Defaults to 0.05.}
}
\value{
An \code{sf} object containing the processed ecoregions with standardized
columns \code{US_L3NAME} and \code{STATE_NAME}.
}
\description{
Extracts EPA Level III ecoregion data from a zipped shapefile,
standardizes column names, and clips the geometry to specified state boundaries.
It includes robust steps for geometry repair (handling spherical validity),
small island removal, and simplification for optimized plotting.
}

19
man/save_combined_topo.Rd Normal file
View File

@@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{save_combined_topo}
\alias{save_combined_topo}
\title{Save Combined Side-by-Side Topo Plot}
\usage{
save_combined_topo(
wa_data,
ga_data,
wa_boundary,
ga_boundary,
wa_raster_path,
ga_raster_path,
output_path
)
}
\description{
Save Combined Side-by-Side Topo Plot
}

26
man/save_error_map_png.Rd Normal file
View File

@@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{save_error_map_png}
\alias{save_error_map_png}
\title{Save Model Error Diagnostic Map}
\usage{
save_error_map_png(data, boundary_sf, raster_path, output_path)
}
\arguments{
\item{data}{A data frame containing model predictions (must include '.pred_class',
'forested', '.pred_Yes', 'lon', and 'lat').}
\item{boundary_sf}{An \code{sf} object representing the state boundary.}
\item{raster_path}{Character string. File path to the elevation raster (.tif).}
\item{output_path}{Character string. File path where the PNG will be saved.}
}
\value{
The \code{output_path} (invisible), for integration with \code{targets}.
}
\description{
Generates a diagnostic map highlighting prediction errors. It plots
misclassified points colored by the magnitude of the error (confidence in the wrong answer)
over a hillshaded elevation background.
}

View File

@@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{save_outlier_map_png}
\alias{save_outlier_map_png}
\title{Save Outlier Diagnostic Map}
\usage{
save_outlier_map_png(data, boundary_sf, raster_path, output_path)
}
\arguments{
\item{data}{A data frame containing the analysis dataset (must include numeric columns and 'forested' factor).}
\item{boundary_sf}{An \code{sf} object representing the state boundary (e.g., Washington).}
\item{raster_path}{Character string. File path to the elevation raster (.tif).}
\item{output_path}{Character string. File path where the PNG will be saved.}
}
\value{
The \code{output_path} (invisible), for integration with \code{targets}.
}
\description{
Generates a diagnostic map highlighting multivariate outliers (Z > 3)
overlaid on a hillshaded elevation raster. Uses the standardized forestry theme.
}

View File

@@ -0,0 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{setup_forestry_fonts}
\alias{setup_forestry_fonts}
\title{Register Project Fonts}
\usage{
setup_forestry_fonts()
}
\value{
NULL (called for side effects)
}
\description{
Registers 'Atkinson Hyperlegible Next' (Sans) and 'Atkinson Hyperlegible Mono'
(Monospace) with the sysfonts package for use in R graphics.
}

21
man/style_audit_table.Rd Normal file
View File

@@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{style_audit_table}
\alias{style_audit_table}
\title{Style Audit Table (GT)}
\usage{
style_audit_table(data, title = NULL, subtitle = NULL)
}
\arguments{
\item{data}{A data frame to be formatted.}
\item{title}{Character string. The title of the table (optional).}
}
\value{
A \code{gt_tbl} object ready for rendering.
}
\description{
Converts a data frame into a formatted \code{gt} table with consistent
styling for audit reports. Includes row striping, numeric formatting, and
standardized font sizes.
}

View File

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{theme_forestry_plot}
\alias{theme_forestry_plot}
\title{Standard Forestry Plot Theme (Cowplot + Atkinson)}
\usage{
theme_forestry_plot(font_size = 14, grid = TRUE)
}
\arguments{
\item{font_size}{Integer. Base font size. Default is 14 (good for slides).}
\item{grid}{Logical. If TRUE, adds a light gray grid (useful for presentations).}
}
\description{
A standardized theme for non-spatial plots (scatter, bar, line).
Based on cowplot::theme_cowplot(), it includes clean axes and a minimalist look.
Uses 'Atkinson Hyperlegible Next' for all text.
}

View File

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{theme_forestry_spatial}
\alias{theme_forestry_spatial}
\title{Standardized Spatial Theme (Atkinson)}
\usage{
theme_forestry_spatial(base_size = 16)
}
\description{
High-visibility map theme for presentations using Atkinson fonts.
}

View File

@@ -0,0 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/functions.R
\name{theme_forestry_void}
\alias{theme_forestry_void}
\title{Standardized Void Theme (Maximal Data Ink)}
\usage{
theme_forestry_void(base_size = 16)
}
\description{
Removes axes/grids for shape-focused maps, but keeps project fonts.
}