Initial commit: BAF Lakehouse fraud detection pipeline
End-to-end LightGBM fraud detection pipeline built as an R package, orchestrated by targets with data stored in MinIO via Apache Arrow. Includes 6-layer Lakehouse architecture, class imbalance tournament, formally tuned hyperparameters (PR-AUC 0.198), and Quarto RevealJS slides. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
11
man/baflakehouse-package.Rd
Normal file
11
man/baflakehouse-package.Rd
Normal file
@@ -0,0 +1,11 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/baflakehouse-package.R
|
||||
\docType{_PACKAGE}
|
||||
\name{baflakehouse-package}
|
||||
\alias{baflakehouse-package}
|
||||
\title{baflakehouse: Lakehouse Workflow for the Bank Account Fraud Dataset}
|
||||
\description{
|
||||
Tools to ingest the Bank Account Fraud (BAF) Base dataset into a MinIO/S3-backed
|
||||
lakehouse, clean encoded missing values, and produce reproducible reporting
|
||||
artifacts orchestrated with targets.
|
||||
}
|
||||
17
man/build_baf_recipe.Rd
Normal file
17
man/build_baf_recipe.Rd
Normal file
@@ -0,0 +1,17 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{build_baf_recipe}
|
||||
\alias{build_baf_recipe}
|
||||
\title{Build Untrained BAF Recipe}
|
||||
\usage{
|
||||
build_baf_recipe(data)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{A data frame}
|
||||
}
|
||||
\value{
|
||||
An untrained tidymodels recipe
|
||||
}
|
||||
\description{
|
||||
Build Untrained BAF Recipe
|
||||
}
|
||||
34
man/clean_baf_base.Rd
Normal file
34
man/clean_baf_base.Rd
Normal file
@@ -0,0 +1,34 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{clean_baf_base}
|
||||
\alias{clean_baf_base}
|
||||
\title{Clean the BAF Base dataset and write to 03_primary}
|
||||
\usage{
|
||||
clean_baf_base(
|
||||
in_prefix,
|
||||
out_prefix = "03_primary/variant=Base",
|
||||
bucket_name = "baf-fraud",
|
||||
partitioning = "month",
|
||||
existing_data_behavior = c("overwrite", "error", "delete_matching"),
|
||||
verbose = TRUE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{in_prefix}{Character. Input dataset prefix inside bucket (e.g. "02_intermediate/variant=Base").}
|
||||
|
||||
\item{out_prefix}{Character. Output dataset prefix inside bucket (e.g. "03_primary/variant=Base").}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
|
||||
\item{partitioning}{Character vector of columns to partition by. Default "month". Set NULL to disable.}
|
||||
|
||||
\item{existing_data_behavior}{One of "overwrite", "error", "delete_matching". Default "overwrite".}
|
||||
|
||||
\item{verbose}{Logical. Emit progress messages. Default TRUE.}
|
||||
}
|
||||
\value{
|
||||
Character. out_prefix (for downstream targets).
|
||||
}
|
||||
\description{
|
||||
Clean the BAF Base dataset and write to 03_primary
|
||||
}
|
||||
19
man/compute_fraud_by_month.Rd
Normal file
19
man/compute_fraud_by_month.Rd
Normal file
@@ -0,0 +1,19 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{compute_fraud_by_month}
|
||||
\alias{compute_fraud_by_month}
|
||||
\title{Fraud prevalence by month (counts + percent)}
|
||||
\usage{
|
||||
compute_fraud_by_month(in_prefix, use_duckdb = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{in_prefix}{Character. Dataset prefix inside the bucket, e.g. "03_primary/variant=Base".}
|
||||
|
||||
\item{use_duckdb}{Logical. Use DuckDB for lazy querying. Default TRUE.}
|
||||
}
|
||||
\value{
|
||||
A tibble with Month, Fraud, Legit, Total, Pct_Fraud.
|
||||
}
|
||||
\description{
|
||||
Computes monthly counts of Fraud/Legit, totals, and percent fraud.
|
||||
}
|
||||
22
man/connect_baf.Rd
Normal file
22
man/connect_baf.Rd
Normal file
@@ -0,0 +1,22 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{connect_baf}
|
||||
\alias{connect_baf}
|
||||
\title{Connect to BAF dataset on MinIO (Arrow or DuckDB)}
|
||||
\usage{
|
||||
connect_baf(prefix, bucket_name = Sys.getenv("BAF_BUCKET"), use_duckdb = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{prefix}{Character. Dataset prefix inside the bucket
|
||||
(e.g., "02_intermediate/variant=Base").}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Defaults to env var BAF_BUCKET.}
|
||||
|
||||
\item{use_duckdb}{Logical. If TRUE, return a DuckDB-backed lazy tbl.}
|
||||
}
|
||||
\value{
|
||||
An Arrow Dataset (default) or a DuckDB-backed lazy table.
|
||||
}
|
||||
\description{
|
||||
Connect to BAF dataset on MinIO (Arrow or DuckDB)
|
||||
}
|
||||
41
man/convert_to_parquet.Rd
Normal file
41
man/convert_to_parquet.Rd
Normal file
@@ -0,0 +1,41 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{convert_to_parquet}
|
||||
\alias{convert_to_parquet}
|
||||
\title{Convert BAF CSV to partitioned Parquet in MinIO (S3)}
|
||||
\usage{
|
||||
convert_to_parquet(from_prefix, to_prefix, bucket_name = "baf-fraud")
|
||||
}
|
||||
\arguments{
|
||||
\item{from_prefix}{Character. Prefix/key under the bucket containing CSVs (e.g. \code{"01_raw"}).}
|
||||
|
||||
\item{to_prefix}{Character. Prefix/key under the bucket to write Parquet dataset (e.g. \code{"02_intermediate"}).}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default \code{"baf-fraud"}.}
|
||||
}
|
||||
\value{
|
||||
A character string giving the destination dataset prefix (typically \code{to_prefix}).
|
||||
}
|
||||
\description{
|
||||
Reads \code{Base.csv} from a MinIO/S3 bucket prefix (e.g., \code{"01_raw"}) and writes a
|
||||
Hive-style partitioned Parquet dataset to another prefix (e.g., \code{"02_intermediate"}),
|
||||
partitioned by \code{variant} (e.g., \verb{variant=Base/part-*.parquet}).
|
||||
}
|
||||
\details{
|
||||
Connection settings are taken from environment variables:
|
||||
\itemize{
|
||||
\item \code{BAF_ENDPOINT} (e.g. \code{"minio:9000"} or \code{"192.168.4.xx:9000"})
|
||||
\item \code{BAF_KEY} (MinIO access key)
|
||||
\item \code{BAF_SECRET} (MinIO secret key)
|
||||
}
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
Sys.setenv(
|
||||
BAF_ENDPOINT = "minio:9000",
|
||||
BAF_KEY = "YOUR_ACCESS_KEY",
|
||||
BAF_SECRET = "YOUR_SECRET_KEY"
|
||||
)
|
||||
convert_to_parquet(from_prefix = "01_raw", to_prefix = "02_intermediate", bucket_name = "baf-fraud")
|
||||
}
|
||||
}
|
||||
14
man/create_efficiency_plot.Rd
Normal file
14
man/create_efficiency_plot.Rd
Normal file
@@ -0,0 +1,14 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{create_efficiency_plot}
|
||||
\alias{create_efficiency_plot}
|
||||
\title{Create Effectiveness vs Efficiency Plot}
|
||||
\usage{
|
||||
create_efficiency_plot(results_df)
|
||||
}
|
||||
\arguments{
|
||||
\item{results_df}{Tibble from run_imbalance_tournament}
|
||||
}
|
||||
\description{
|
||||
Create Effectiveness vs Efficiency Plot
|
||||
}
|
||||
37
man/engineer_features.Rd
Normal file
37
man/engineer_features.Rd
Normal file
@@ -0,0 +1,37 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{engineer_features}
|
||||
\alias{engineer_features}
|
||||
\title{Engineer features for the BAF dataset}
|
||||
\usage{
|
||||
engineer_features(
|
||||
in_prefix = "03_primary/variant=Base",
|
||||
out_prefix = "04_feature/variant=Base",
|
||||
bucket_name = "baf-fraud",
|
||||
partitioning = "month",
|
||||
existing_data_behavior = "delete_matching",
|
||||
verbose = TRUE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{in_prefix}{Character. Input dataset prefix (e.g., "03_primary/variant=Base").}
|
||||
|
||||
\item{out_prefix}{Character. Output dataset prefix (e.g., "04_feature/variant=Base").}
|
||||
|
||||
\item{bucket_name}{Character. The S3/MinIO bucket name. Default "baf-fraud".}
|
||||
|
||||
\item{partitioning}{Character vector. Columns to partition by. Default "month".}
|
||||
|
||||
\item{existing_data_behavior}{Character. Behavior when data exists. Default "delete_matching".}
|
||||
|
||||
\item{verbose}{Logical. Whether to print progress messages. Default TRUE.}
|
||||
}
|
||||
\value{
|
||||
Character. The output prefix path for downstream targets.
|
||||
}
|
||||
\description{
|
||||
Reads the primary BAF dataset and engineers new features, such as
|
||||
\code{n_missing}, which counts the number of missing values across key
|
||||
tenure and financial columns. This calculation is performed out-of-memory
|
||||
using Arrow compute.
|
||||
}
|
||||
27
man/evaluate_final_model.Rd
Normal file
27
man/evaluate_final_model.Rd
Normal file
@@ -0,0 +1,27 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{evaluate_final_model}
|
||||
\alias{evaluate_final_model}
|
||||
\title{Final Model Evaluation (Months 6 & 7)}
|
||||
\usage{
|
||||
evaluate_final_model(
|
||||
params,
|
||||
bucket_name = "baf-fraud",
|
||||
inputs_prefix = "05_model_input"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{A named list of LightGBM hyperparameters with elements:
|
||||
\code{trees}, \code{tree_depth}, \code{learn_rate}, \code{loss_reduction}, \code{min_n}.}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
|
||||
\item{inputs_prefix}{Character. Model input prefix. Default "05_model_input".}
|
||||
}
|
||||
\value{
|
||||
A tibble with columns \code{truth}, \code{prob}, and \code{pred_class}.
|
||||
}
|
||||
\description{
|
||||
Trains the winning strategy on the full training set (Months 0-5)
|
||||
and evaluates it on the unseen test set (Months 6-7).
|
||||
}
|
||||
18
man/format_class_imbalance_tourney_gt.Rd
Normal file
18
man/format_class_imbalance_tourney_gt.Rd
Normal file
@@ -0,0 +1,18 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{format_class_imbalance_tourney_gt}
|
||||
\alias{format_class_imbalance_tourney_gt}
|
||||
\title{Format Class Imbalance Tournament Table}
|
||||
\usage{
|
||||
format_class_imbalance_tourney_gt(results_df)
|
||||
}
|
||||
\arguments{
|
||||
\item{results_df}{The tibble output from \code{run_imbalance_tournament}.}
|
||||
}
|
||||
\value{
|
||||
A formatted gt table object.
|
||||
}
|
||||
\description{
|
||||
Aggregates results from the model tournament and performs paired t-tests
|
||||
against the 'Standard' model to determine statistical significance.
|
||||
}
|
||||
17
man/format_fraud_by_month_gt.Rd
Normal file
17
man/format_fraud_by_month_gt.Rd
Normal file
@@ -0,0 +1,17 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{format_fraud_by_month_gt}
|
||||
\alias{format_fraud_by_month_gt}
|
||||
\title{Format fraud-by-month table as a gt object}
|
||||
\usage{
|
||||
format_fraud_by_month_gt(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Tibble from compute_fraud_by_month().}
|
||||
}
|
||||
\value{
|
||||
A gt table.
|
||||
}
|
||||
\description{
|
||||
Format fraud-by-month table as a gt object
|
||||
}
|
||||
27
man/generate_model_inputs.Rd
Normal file
27
man/generate_model_inputs.Rd
Normal file
@@ -0,0 +1,27 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{generate_model_inputs}
|
||||
\alias{generate_model_inputs}
|
||||
\title{Generate Resampled Model Inputs}
|
||||
\usage{
|
||||
generate_model_inputs(
|
||||
feature_prefix = "04_feature/variant=Base",
|
||||
out_prefix = "05_model_input",
|
||||
bucket_name = "baf-fraud"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{feature_prefix}{Character. Input prefix (e.g., "04_feature/variant=Base").}
|
||||
|
||||
\item{out_prefix}{Character. Output prefix base (e.g., "05_model_input").}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
}
|
||||
\value{
|
||||
Character. The output prefix (for targets dependency tracking).
|
||||
}
|
||||
\description{
|
||||
Reads the engineered feature layer, prepares a base tidymodels recipe,
|
||||
and generates resampled datasets (Baseline, Under, SMOTE, Adasyn, Tomek)
|
||||
across all months, saving them to the 05_model_input prefix.
|
||||
}
|
||||
21
man/plot_conf_mat_heatmap.Rd
Normal file
21
man/plot_conf_mat_heatmap.Rd
Normal file
@@ -0,0 +1,21 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_conf_mat_heatmap}
|
||||
\alias{plot_conf_mat_heatmap}
|
||||
\title{Plot Confusion Matrix Heatmap}
|
||||
\usage{
|
||||
plot_conf_mat_heatmap(cm, title = "")
|
||||
}
|
||||
\arguments{
|
||||
\item{cm}{A yardstick conf_mat object.}
|
||||
|
||||
\item{title}{Character. The main title of the plot.}
|
||||
|
||||
\item{subtitle}{Character. The subtitle of the plot.}
|
||||
}
|
||||
\value{
|
||||
A ggplot object.
|
||||
}
|
||||
\description{
|
||||
Generates a styled 4-quadrant heatmap from a yardstick confusion matrix.
|
||||
}
|
||||
34
man/plot_fraud_by_month.Rd
Normal file
34
man/plot_fraud_by_month.Rd
Normal file
@@ -0,0 +1,34 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_fraud_by_month}
|
||||
\alias{plot_fraud_by_month}
|
||||
\title{Plot applications by month (Legit vs Fraud) on a log scale}
|
||||
\usage{
|
||||
plot_fraud_by_month(
|
||||
dataset_prefix,
|
||||
bucket_name = "baf-fraud",
|
||||
palette = "Dark 3",
|
||||
title = ""
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{dataset_prefix}{Character. Prefix inside the bucket, e.g. "03_primary/variant=Base".}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
|
||||
\item{palette}{Character. colorspace qualitative palette name. Default "Dark 3".}
|
||||
|
||||
\item{title}{Character. Plot title. Default "".}
|
||||
}
|
||||
\value{
|
||||
A ggplot object.
|
||||
}
|
||||
\description{
|
||||
Builds an exploratory chart of absolute application counts by month
|
||||
split by outcome (Legit vs Fraud). Uses a log10 y-axis so rare fraud
|
||||
remains visible on the same axis.
|
||||
}
|
||||
\details{
|
||||
Data source: expects a cleaned "primary" dataset prefix (e.g. 03_primary/variant=Base)
|
||||
stored in MinIO/S3, accessed via \code{connect_baf()}.
|
||||
}
|
||||
16
man/plot_hexbin_interaction.Rd
Normal file
16
man/plot_hexbin_interaction.Rd
Normal file
@@ -0,0 +1,16 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_hexbin_interaction}
|
||||
\alias{plot_hexbin_interaction}
|
||||
\title{Plot Hexbin Interaction}
|
||||
\usage{
|
||||
plot_hexbin_interaction(baked_data, title = "")
|
||||
}
|
||||
\arguments{
|
||||
\item{baked_data}{Baked EDA data}
|
||||
|
||||
\item{title}{Character. Plot title. Default "".}
|
||||
}
|
||||
\description{
|
||||
Plot Hexbin Interaction
|
||||
}
|
||||
16
man/plot_missingness.Rd
Normal file
16
man/plot_missingness.Rd
Normal file
@@ -0,0 +1,16 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_missingness}
|
||||
\alias{plot_missingness}
|
||||
\title{Plot Missingness Signal}
|
||||
\usage{
|
||||
plot_missingness(eda_data, title = "")
|
||||
}
|
||||
\arguments{
|
||||
\item{eda_data}{Raw EDA data}
|
||||
|
||||
\item{title}{Character. Plot title. Default "".}
|
||||
}
|
||||
\description{
|
||||
Plot Missingness Signal
|
||||
}
|
||||
16
man/plot_num_cor.Rd
Normal file
16
man/plot_num_cor.Rd
Normal file
@@ -0,0 +1,16 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_num_cor}
|
||||
\alias{plot_num_cor}
|
||||
\title{Plot Numeric Correlation Matrix}
|
||||
\usage{
|
||||
plot_num_cor(eda_data, title = "")
|
||||
}
|
||||
\arguments{
|
||||
\item{eda_data}{Raw EDA data}
|
||||
|
||||
\item{title}{Character. Plot title. Default "".}
|
||||
}
|
||||
\description{
|
||||
Plot Numeric Correlation Matrix
|
||||
}
|
||||
16
man/plot_var_imp.Rd
Normal file
16
man/plot_var_imp.Rd
Normal file
@@ -0,0 +1,16 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{plot_var_imp}
|
||||
\alias{plot_var_imp}
|
||||
\title{Plot Variable Importance}
|
||||
\usage{
|
||||
plot_var_imp(model, title = "")
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Trained LightGBM model}
|
||||
|
||||
\item{title}{Character. Plot title. Default "".}
|
||||
}
|
||||
\description{
|
||||
Plot Variable Importance
|
||||
}
|
||||
14
man/prepare_eda_recipe.Rd
Normal file
14
man/prepare_eda_recipe.Rd
Normal file
@@ -0,0 +1,14 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{prepare_eda_recipe}
|
||||
\alias{prepare_eda_recipe}
|
||||
\title{Prepare EDA Recipe}
|
||||
\usage{
|
||||
prepare_eda_recipe(eda_data)
|
||||
}
|
||||
\arguments{
|
||||
\item{eda_data}{Raw EDA data}
|
||||
}
|
||||
\description{
|
||||
Prepare EDA Recipe
|
||||
}
|
||||
21
man/render_slides.Rd
Normal file
21
man/render_slides.Rd
Normal file
@@ -0,0 +1,21 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{render_slides}
|
||||
\alias{render_slides}
|
||||
\title{Render Quarto revealjs slideshow after required assets exist}
|
||||
\usage{
|
||||
render_slides(qmd = "index.qmd", assets, output_dir = "reports/slides")
|
||||
}
|
||||
\arguments{
|
||||
\item{qmd}{Character. Input Quarto file (e.g. "index.qmd").}
|
||||
|
||||
\item{assets}{Character vector. File paths that must exist before rendering.}
|
||||
|
||||
\item{output_dir}{Character. Output directory for rendered slides.}
|
||||
}
|
||||
\value{
|
||||
Character path to the rendered HTML file.
|
||||
}
|
||||
\description{
|
||||
Render Quarto revealjs slideshow after required assets exist
|
||||
}
|
||||
34
man/run_imbalance_tournament.Rd
Normal file
34
man/run_imbalance_tournament.Rd
Normal file
@@ -0,0 +1,34 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{run_imbalance_tournament}
|
||||
\alias{run_imbalance_tournament}
|
||||
\title{Run Class Imbalance Tournament}
|
||||
\usage{
|
||||
run_imbalance_tournament(
|
||||
tasks,
|
||||
windows,
|
||||
feature_prefix,
|
||||
bucket_name = "baf-fraud",
|
||||
inputs_prefix = "05_model_input"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{tasks}{A tibble containing recipe_name, data_folder, and scale_pos_weight.}
|
||||
|
||||
\item{windows}{A tibble containing window_id, train_months, and test_month.}
|
||||
|
||||
\item{feature_prefix}{Character. The upstream dependency prefix (used to force DAG execution).}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
|
||||
\item{inputs_prefix}{Character. The folder containing the sampled data. Default "05_model_input".}
|
||||
}
|
||||
\value{
|
||||
A tibble with the summarized tournament results.
|
||||
}
|
||||
\description{
|
||||
Trains LightGBM models across different class imbalance strategies
|
||||
(Standard, SMOTE, Adasyn, etc.) using sliding time windows. Evaluates
|
||||
performance using PR-AUC and calculates statistical significance.
|
||||
Includes common-sense hyperparameter defaults to prevent overfitting.
|
||||
}
|
||||
31
man/save_report_figure.Rd
Normal file
31
man/save_report_figure.Rd
Normal file
@@ -0,0 +1,31 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{save_report_figure}
|
||||
\alias{save_report_figure}
|
||||
\title{Save a report figure artifact}
|
||||
\usage{
|
||||
save_report_figure(
|
||||
plot,
|
||||
filename,
|
||||
out_dir = "reports/figures",
|
||||
width = 12,
|
||||
height = 6.75,
|
||||
dpi = 300
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{plot}{A ggplot object.}
|
||||
|
||||
\item{filename}{Character. Output filename, e.g. \code{"fig_fraud_by_month.png"}.}
|
||||
|
||||
\item{out_dir}{Character. Output directory. Default \code{"reports/figures"}.}
|
||||
|
||||
\item{width, height, dpi}{Numeric. Passed to \code{ggplot2::ggsave()}.}
|
||||
}
|
||||
\value{
|
||||
Character. Normalized path to the saved file.
|
||||
}
|
||||
\description{
|
||||
Saves a ggplot object to \code{reports/figures/}.
|
||||
Intended for use in \code{targets} pipelines as a file-producing target.
|
||||
}
|
||||
21
man/save_report_table.Rd
Normal file
21
man/save_report_table.Rd
Normal file
@@ -0,0 +1,21 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{save_report_table}
|
||||
\alias{save_report_table}
|
||||
\title{Save a report table artifact}
|
||||
\usage{
|
||||
save_report_table(x, filename, out_dir = "reports/tables")
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object to save.}
|
||||
|
||||
\item{filename}{Output filename, e.g. "tbl_fraud_by_month.rds".}
|
||||
|
||||
\item{out_dir}{Output directory. Default "reports/tables".}
|
||||
}
|
||||
\value{
|
||||
Character path to saved file.
|
||||
}
|
||||
\description{
|
||||
Save a report table artifact
|
||||
}
|
||||
14
man/train_diag_model.Rd
Normal file
14
man/train_diag_model.Rd
Normal file
@@ -0,0 +1,14 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{train_diag_model}
|
||||
\alias{train_diag_model}
|
||||
\title{Train Diagnostic Model}
|
||||
\usage{
|
||||
train_diag_model(baked_data)
|
||||
}
|
||||
\arguments{
|
||||
\item{baked_data}{Baked EDA data}
|
||||
}
|
||||
\description{
|
||||
Train Diagnostic Model
|
||||
}
|
||||
30
man/train_production_model.Rd
Normal file
30
man/train_production_model.Rd
Normal file
@@ -0,0 +1,30 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{train_production_model}
|
||||
\alias{train_production_model}
|
||||
\title{Train and Serialize Production LightGBM Model}
|
||||
\usage{
|
||||
train_production_model(
|
||||
data,
|
||||
recipe,
|
||||
best_params,
|
||||
model_filename = "lgbm_prod.txt"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{A data frame containing the full BAF dataset (Months 0-7).}
|
||||
|
||||
\item{recipe}{A prepared tidymodels recipe.}
|
||||
|
||||
\item{best_params}{A list or tibble of the winning hyperparameters.}
|
||||
|
||||
\item{model_filename}{Character. The target filename. Defaults to "lgbm_prod.txt".}
|
||||
}
|
||||
\value{
|
||||
Character. The MinIO URI of the uploaded model artifact.
|
||||
}
|
||||
\description{
|
||||
Trains a LightGBM model on the complete dataset using the winning
|
||||
hyperparameters, serializes it to a text file, and uploads it directly
|
||||
to MinIO via the Apache Arrow S3 interface.
|
||||
}
|
||||
Reference in New Issue
Block a user