Initial commit: BAF Lakehouse fraud detection pipeline
End-to-end LightGBM fraud detection pipeline built as an R package, orchestrated by targets with data stored in MinIO via Apache Arrow. Includes 6-layer Lakehouse architecture, class imbalance tournament, formally tuned hyperparameters (PR-AUC 0.198), and Quarto RevealJS slides. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
27
man/generate_model_inputs.Rd
Normal file
27
man/generate_model_inputs.Rd
Normal file
@@ -0,0 +1,27 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/functions.R
|
||||
\name{generate_model_inputs}
|
||||
\alias{generate_model_inputs}
|
||||
\title{Generate Resampled Model Inputs}
|
||||
\usage{
|
||||
generate_model_inputs(
|
||||
feature_prefix = "04_feature/variant=Base",
|
||||
out_prefix = "05_model_input",
|
||||
bucket_name = "baf-fraud"
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{feature_prefix}{Character. Input prefix (e.g., "04_feature/variant=Base").}
|
||||
|
||||
\item{out_prefix}{Character. Output prefix base (e.g., "05_model_input").}
|
||||
|
||||
\item{bucket_name}{Character. Bucket name. Default "baf-fraud".}
|
||||
}
|
||||
\value{
|
||||
Character. The output prefix (for targets dependency tracking).
|
||||
}
|
||||
\description{
|
||||
Reads the engineered feature layer, prepares a base tidymodels recipe,
|
||||
and generates resampled datasets (Baseline, Under, SMOTE, Adasyn, Tomek)
|
||||
across all months, saving them to the 05_model_input prefix.
|
||||
}
|
||||
Reference in New Issue
Block a user