Refactor: consistent naming across functions, targets, and pkgdown
Functions: prepare_eda_recipe -> build_eda_recipe,
create_efficiency_plot -> plot_efficiency,
format_class_imbalance_tourney_gt -> format_tournament_gt
Targets: model_inputs_prefix -> baf_model_input_prefix,
tbl_fraud_by_month_data -> fraud_by_month_summary,
model_diag -> diag_fit, winning_params -> best_params,
production_recipe_blueprint -> prod_recipe,
final_eval_data -> test_predictions
pkgdown: restructured reference index into 6 logical sections,
removed stale names and development comments.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,14 +1,15 @@
|
|||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
export(build_baf_recipe)
|
export(build_baf_recipe)
|
||||||
|
export(build_eda_recipe)
|
||||||
export(clean_baf_base)
|
export(clean_baf_base)
|
||||||
export(compute_fraud_by_month)
|
export(compute_fraud_by_month)
|
||||||
export(connect_baf)
|
export(connect_baf)
|
||||||
export(convert_to_parquet)
|
export(convert_to_parquet)
|
||||||
export(engineer_features)
|
export(engineer_features)
|
||||||
export(evaluate_final_model)
|
export(evaluate_final_model)
|
||||||
export(format_class_imbalance_tourney_gt)
|
|
||||||
export(format_fraud_by_month_gt)
|
export(format_fraud_by_month_gt)
|
||||||
|
export(format_tournament_gt)
|
||||||
export(generate_model_inputs)
|
export(generate_model_inputs)
|
||||||
export(plot_conf_mat_heatmap)
|
export(plot_conf_mat_heatmap)
|
||||||
export(plot_fraud_by_month)
|
export(plot_fraud_by_month)
|
||||||
@@ -16,7 +17,6 @@ export(plot_hexbin_interaction)
|
|||||||
export(plot_missingness)
|
export(plot_missingness)
|
||||||
export(plot_num_cor)
|
export(plot_num_cor)
|
||||||
export(plot_var_imp)
|
export(plot_var_imp)
|
||||||
export(prepare_eda_recipe)
|
|
||||||
export(render_slides)
|
export(render_slides)
|
||||||
export(run_imbalance_tournament)
|
export(run_imbalance_tournament)
|
||||||
export(save_report_figure)
|
export(save_report_figure)
|
||||||
|
|||||||
@@ -580,7 +580,7 @@ run_imbalance_tournament <- function(
|
|||||||
return(results_df)
|
return(results_df)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Format Class Imbalance Tournament Table
|
#' Format Tournament Results Table
|
||||||
#'
|
#'
|
||||||
#' Aggregates results from the model tournament and performs paired t-tests
|
#' Aggregates results from the model tournament and performs paired t-tests
|
||||||
#' against the 'Standard' model to determine statistical significance.
|
#' against the 'Standard' model to determine statistical significance.
|
||||||
@@ -593,7 +593,7 @@ run_imbalance_tournament <- function(
|
|||||||
#'
|
#'
|
||||||
#' @return A formatted gt table object.
|
#' @return A formatted gt table object.
|
||||||
#' @export
|
#' @export
|
||||||
format_class_imbalance_tourney_gt <- function(results_df) {
|
format_tournament_gt <- function(results_df) {
|
||||||
|
|
||||||
# Extract scores for the 'Standard' recipe to use as the baseline for t-tests
|
# Extract scores for the 'Standard' recipe to use as the baseline for t-tests
|
||||||
standard_scores <- results_df |>
|
standard_scores <- results_df |>
|
||||||
@@ -648,12 +648,12 @@ format_class_imbalance_tourney_gt <- function(results_df) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Create Effectiveness vs Efficiency Plot
|
#' Plot Effectiveness vs Efficiency
|
||||||
#' @param results_df Tibble from run_imbalance_tournament
|
#' @param results_df Tibble from run_imbalance_tournament
|
||||||
#' @importFrom ggplot2 ggplot aes geom_point scale_color_manual labs theme_minimal
|
#' @importFrom ggplot2 ggplot aes geom_point scale_color_manual labs theme_minimal
|
||||||
#' @importFrom ggrepel geom_text_repel
|
#' @importFrom ggrepel geom_text_repel
|
||||||
#' @importFrom cowplot theme_half_open background_grid
|
#' @importFrom cowplot theme_half_open background_grid
|
||||||
create_efficiency_plot <- function(results_df) {
|
plot_efficiency <- function(results_df) {
|
||||||
# Aggregate by recipe
|
# Aggregate by recipe
|
||||||
plot_data <- results_df |>
|
plot_data <- results_df |>
|
||||||
dplyr::group_by(recipe) |>
|
dplyr::group_by(recipe) |>
|
||||||
@@ -677,11 +677,11 @@ create_efficiency_plot <- function(results_df) {
|
|||||||
cowplot::theme_half_open(font_family = "Atkinson Hyperlegible") +
|
cowplot::theme_half_open(font_family = "Atkinson Hyperlegible") +
|
||||||
cowplot::background_grid(major = "y")
|
cowplot::background_grid(major = "y")
|
||||||
}
|
}
|
||||||
#' Prepare EDA Recipe
|
#' Build EDA Recipe
|
||||||
#' @param eda_data Raw EDA data
|
#' @param eda_data Raw EDA data
|
||||||
#' @importFrom recipes recipe update_role step_novel step_unknown step_impute_median step_dummy all_nominal_predictors all_numeric_predictors prep
|
#' @importFrom recipes recipe update_role step_novel step_unknown step_impute_median step_dummy all_nominal_predictors all_numeric_predictors prep
|
||||||
#' @export
|
#' @export
|
||||||
prepare_eda_recipe <- function(eda_data) {
|
build_eda_recipe <- function(eda_data) {
|
||||||
recipe(outcome ~ ., data = eda_data) |>
|
recipe(outcome ~ ., data = eda_data) |>
|
||||||
update_role(month, new_role = "ID") |>
|
update_role(month, new_role = "ID") |>
|
||||||
step_novel(all_nominal_predictors()) |>
|
step_novel(all_nominal_predictors()) |>
|
||||||
|
|||||||
51
_pkgdown.yml
51
_pkgdown.yml
@@ -2,7 +2,7 @@ url: https://docs.robwiederstein.org/baflakehouse
|
|||||||
|
|
||||||
template:
|
template:
|
||||||
bootstrap: 5
|
bootstrap: 5
|
||||||
bootswatch: flatly # Clean, professional look
|
bootswatch: flatly
|
||||||
|
|
||||||
navbar:
|
navbar:
|
||||||
structure:
|
structure:
|
||||||
@@ -15,7 +15,7 @@ navbar:
|
|||||||
|
|
||||||
reference:
|
reference:
|
||||||
- title: "Data Ingestion & Lakehouse Setup"
|
- title: "Data Ingestion & Lakehouse Setup"
|
||||||
desc: "Functions for moving data from CSV to partitioned Parquet in MinIO."
|
desc: "Functions for moving raw CSV data into the MinIO Lakehouse as partitioned Parquet."
|
||||||
contents:
|
contents:
|
||||||
- baflakehouse-package
|
- baflakehouse-package
|
||||||
- convert_to_parquet
|
- convert_to_parquet
|
||||||
@@ -23,36 +23,43 @@ reference:
|
|||||||
- clean_baf_base
|
- clean_baf_base
|
||||||
|
|
||||||
- title: "Feature Engineering & Preprocessing"
|
- title: "Feature Engineering & Preprocessing"
|
||||||
desc: "The 'Recipes' layer of the pipeline."
|
desc: "Recipes and transformations applied across the pipeline layers."
|
||||||
contents:
|
contents:
|
||||||
- engineer_features
|
- engineer_features
|
||||||
- prepare_eda_recipe
|
|
||||||
- build_baf_recipe # NEW: Untrained blueprint for production
|
|
||||||
- generate_model_inputs
|
- generate_model_inputs
|
||||||
|
- build_eda_recipe
|
||||||
|
- build_baf_recipe
|
||||||
|
|
||||||
- title: "The Tournament (Model Selection)"
|
- title: "Exploratory Data Analysis"
|
||||||
desc: "Cross-validation and imbalance strategy testing."
|
desc: "Diagnostic model and visualizations for understanding the fraud signal."
|
||||||
|
contents:
|
||||||
|
- train_diag_model
|
||||||
|
- plot_var_imp
|
||||||
|
- plot_hexbin_interaction
|
||||||
|
- plot_missingness
|
||||||
|
- plot_num_cor
|
||||||
|
|
||||||
|
- title: "Model Selection & Tuning"
|
||||||
|
desc: "Imbalance strategy tournament, hyperparameter tuning, and results formatting."
|
||||||
contents:
|
contents:
|
||||||
- run_imbalance_tournament
|
- run_imbalance_tournament
|
||||||
- tune_lgbm
|
- tune_lgbm
|
||||||
- train_diag_model
|
- format_tournament_gt
|
||||||
- create_efficiency_plot # Moved here: Belongs with the tournament
|
- plot_efficiency
|
||||||
|
|
||||||
- title: "Final Evaluation & Production Deployment"
|
- title: "Final Evaluation & Production Deployment"
|
||||||
desc: "Results on unseen data (Months 6-7) and MinIO artifact serialization."
|
desc: "Holdout evaluation on months 6-7 and MinIO model artifact serialization."
|
||||||
contents:
|
contents:
|
||||||
- evaluate_final_model
|
- evaluate_final_model
|
||||||
- train_production_model # NEW: The final deployment function
|
- train_production_model
|
||||||
|
|
||||||
- title: "Reporting: Tables & Visualizations"
|
- title: "Reporting"
|
||||||
desc: "Generating ggplot2 figures and gt tables for Quarto."
|
desc: "Figures, tables, and slide rendering for the Quarto presentation."
|
||||||
contents:
|
contents:
|
||||||
- starts_with("plot_")
|
- plot_fraud_by_month
|
||||||
- starts_with("compute_")
|
- plot_conf_mat_heatmap
|
||||||
- starts_with("format_") # Neatly catches all your gt table formatters
|
- compute_fraud_by_month
|
||||||
|
- format_fraud_by_month_gt
|
||||||
- title: "Pipeline Utilities"
|
- save_report_figure
|
||||||
desc: "Internal helpers for the targets workflow and slide generation."
|
- save_report_table
|
||||||
contents:
|
- render_slides
|
||||||
- starts_with("save_report_")
|
|
||||||
- render_slides # Consolidated here
|
|
||||||
|
|||||||
214
_targets.R
214
_targets.R
@@ -62,39 +62,17 @@ list(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- Figure objects ----
|
# ---- 05_model_input Generation ----
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_fraud_by_month,
|
baf_model_input_prefix,
|
||||||
plot_fraud_by_month(baf_primary_prefix, bucket_name = "baf-fraud")
|
generate_model_inputs(
|
||||||
|
feature_prefix = baf_feature_prefix,
|
||||||
|
out_prefix = "05_model_input",
|
||||||
|
bucket_name = "baf-fraud"
|
||||||
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- Saved figure path (file target) ----
|
# ---- EDA Layer ----
|
||||||
tar_target(
|
|
||||||
fig_fraud_by_month_path,
|
|
||||||
save_report_figure(
|
|
||||||
fig_fraud_by_month,
|
|
||||||
filename = "fig_fraud_by_month.png",
|
|
||||||
out_dir = "reports/figures"
|
|
||||||
),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
tbl_fraud_by_month_data,
|
|
||||||
compute_fraud_by_month(baf_primary_prefix)
|
|
||||||
),
|
|
||||||
|
|
||||||
tar_target(
|
|
||||||
tbl_fraud_by_month_gt,
|
|
||||||
format_fraud_by_month_gt(tbl_fraud_by_month_data)
|
|
||||||
),
|
|
||||||
|
|
||||||
tar_target(
|
|
||||||
tbl_fraud_by_month_path,
|
|
||||||
save_report_table(tbl_fraud_by_month_gt, filename = "tbl_fraud_by_month.rds"),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
|
|
||||||
# ---- Exploratory Data Analysis (EDA) Layer ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
data_eda_m0,
|
data_eda_m0,
|
||||||
connect_baf(baf_primary_prefix, use_duckdb = TRUE) |>
|
connect_baf(baf_primary_prefix, use_duckdb = TRUE) |>
|
||||||
@@ -104,7 +82,7 @@ list(
|
|||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
eda_recipe,
|
eda_recipe,
|
||||||
prepare_eda_recipe(data_eda_m0)
|
build_eda_recipe(data_eda_m0)
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
@@ -113,46 +91,30 @@ list(
|
|||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
model_diag,
|
diag_fit,
|
||||||
train_diag_model(data_baked_eda_m0)
|
train_diag_model(data_baked_eda_m0)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- EDA Figures ----
|
# ---- EDA Figures ----
|
||||||
tar_target(fig_var_imp, plot_var_imp(model_diag)),
|
tar_target(fig_var_imp, plot_var_imp(diag_fit)),
|
||||||
tar_target(fig_hexbin_interaction, plot_hexbin_interaction(data_baked_eda_m0)),
|
tar_target(fig_hexbin_interaction, plot_hexbin_interaction(data_baked_eda_m0)),
|
||||||
tar_target(fig_missingness, plot_missingness(data_eda_m0)),
|
tar_target(fig_missingness, plot_missingness(data_eda_m0)),
|
||||||
tar_target(fig_num_cor, plot_num_cor(data_eda_m0)),
|
tar_target(fig_num_cor, plot_num_cor(data_eda_m0)),
|
||||||
|
|
||||||
# ---- Saved EDA Figure Paths ----
|
# ---- Fraud Prevalence ----
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_var_imp_path,
|
fig_fraud_by_month,
|
||||||
save_report_figure(fig_var_imp, "fig_var_imp.png"),
|
plot_fraud_by_month(baf_primary_prefix, bucket_name = "baf-fraud")
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
fig_hexbin_interaction_path,
|
|
||||||
save_report_figure(fig_hexbin_interaction, "fig_hexbin_interaction.png"),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
fig_missingness_path,
|
|
||||||
save_report_figure(fig_missingness, "fig_missingness.png"),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
fig_num_cor_path,
|
|
||||||
save_report_figure(fig_num_cor, "fig_num_cor.png"),
|
|
||||||
format = "file"
|
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- 05_model_input Generation ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
model_inputs_prefix,
|
fraud_by_month_summary,
|
||||||
generate_model_inputs(
|
compute_fraud_by_month(baf_primary_prefix)
|
||||||
feature_prefix = baf_feature_prefix,
|
),
|
||||||
out_prefix = "05_model_input",
|
|
||||||
bucket_name = "baf-fraud"
|
tar_target(
|
||||||
)
|
tbl_fraud_by_month_gt,
|
||||||
|
format_fraud_by_month_gt(fraud_by_month_summary)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- Tournament Inputs ----
|
# ---- Tournament Inputs ----
|
||||||
@@ -179,103 +141,105 @@ list(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- 1. Data Layer (The Tournament Results) ----
|
# ---- Hyperparameter Tuning ----
|
||||||
|
tar_target(
|
||||||
|
best_params,
|
||||||
|
tune_lgbm(imbalance_windows)
|
||||||
|
),
|
||||||
|
|
||||||
|
# ---- Tournament Results ----
|
||||||
tar_target(
|
tar_target(
|
||||||
tbl_strategy_showdown,
|
tbl_strategy_showdown,
|
||||||
{
|
{
|
||||||
# Force DAG to wait for the folders to be generated
|
force(baf_model_input_prefix)
|
||||||
force(model_inputs_prefix)
|
|
||||||
# Pass baf_feature_prefix so it tracks the latest layer
|
|
||||||
run_imbalance_tournament(imbalance_tasks, imbalance_windows, baf_feature_prefix)
|
run_imbalance_tournament(imbalance_tasks, imbalance_windows, baf_feature_prefix)
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- 2. Figure Layer ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_strategy_showdown,
|
fig_strategy_showdown,
|
||||||
create_efficiency_plot(tbl_strategy_showdown)
|
plot_efficiency(tbl_strategy_showdown)
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
|
||||||
fig_strategy_showdown_path,
|
|
||||||
save_report_figure(
|
|
||||||
fig_strategy_showdown,
|
|
||||||
filename = "fig_strategy_showdown.png",
|
|
||||||
out_dir = "reports/figures"
|
|
||||||
),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
|
|
||||||
# ---- 3. Table Layer (gt object) ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
tbl_strategy_showdown_gt,
|
tbl_strategy_showdown_gt,
|
||||||
format_class_imbalance_tourney_gt(tbl_strategy_showdown)
|
format_tournament_gt(tbl_strategy_showdown)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
# ---- Final Evaluation (Months 6-7) ----
|
||||||
tar_target(
|
tar_target(
|
||||||
tbl_strategy_showdown_path,
|
test_predictions,
|
||||||
save_report_table(
|
evaluate_final_model(params = best_params)
|
||||||
tbl_strategy_showdown_gt,
|
|
||||||
filename = "tbl_strategy_showdown.rds",
|
|
||||||
out_dir = "reports/tables"
|
|
||||||
),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
|
|
||||||
# ---- Final Production Evaluation ----
|
|
||||||
tar_target(
|
|
||||||
final_eval_data,
|
|
||||||
evaluate_final_model(params = winning_params)
|
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
final_conf_mat,
|
final_conf_mat,
|
||||||
yardstick::conf_mat(final_eval_data, truth, pred_class)
|
yardstick::conf_mat(test_predictions, truth, pred_class)
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
final_roc_curve,
|
final_roc_curve,
|
||||||
yardstick::roc_curve(final_eval_data, truth, prob)
|
yardstick::roc_curve(test_predictions, truth, prob)
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
tar_target(
|
||||||
final_pr_curve,
|
final_pr_curve,
|
||||||
yardstick::pr_curve(final_eval_data, truth, prob)
|
yardstick::pr_curve(test_predictions, truth, prob)
|
||||||
),
|
),
|
||||||
|
|
||||||
# ---- Save Final Assets ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_final_curves,
|
fig_final_curves,
|
||||||
{
|
{
|
||||||
p1 <- ggplot2::autoplot(final_roc_curve) + ggplot2::labs(title = "ROC Curve (Months 6-7)")
|
p1 <- ggplot2::autoplot(final_roc_curve) + ggplot2::labs(title = "ROC Curve (Months 6-7)")
|
||||||
p2 <- ggplot2::autoplot(final_pr_curve) + ggplot2::labs(title = "PR Curve (Months 6-7)")
|
p2 <- ggplot2::autoplot(final_pr_curve) + ggplot2::labs(title = "PR Curve (Months 6-7)")
|
||||||
cowplot::plot_grid(p1, p2)
|
cowplot::plot_grid(p1, p2)
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
|
|
||||||
tar_target(
|
|
||||||
fig_final_curves_path,
|
|
||||||
save_report_figure(fig_final_curves, "fig_final_curves.png"),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
|
|
||||||
tar_target(
|
|
||||||
tbl_final_conf_mat_path,
|
|
||||||
save_report_table(final_conf_mat, "tbl_final_conf_mat.rds", out_dir = "reports/tables"),
|
|
||||||
format = "file"
|
|
||||||
),
|
|
||||||
# ---- Generate and Save Heatmap ----
|
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_final_conf_mat,
|
fig_final_conf_mat,
|
||||||
plot_conf_mat_heatmap(final_conf_mat)
|
plot_conf_mat_heatmap(final_conf_mat)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
# ---- Production Deployment ----
|
||||||
tar_target(
|
tar_target(
|
||||||
fig_final_conf_mat_path,
|
data_full,
|
||||||
save_report_figure(fig_final_conf_mat, "fig_final_conf_mat.png"),
|
connect_baf(baf_feature_prefix, use_duckdb = TRUE) |>
|
||||||
format = "file"
|
collect()
|
||||||
),
|
),
|
||||||
# ---- Report Dependency Update ----
|
|
||||||
|
tar_target(
|
||||||
|
prod_recipe,
|
||||||
|
build_baf_recipe(data_full)
|
||||||
|
),
|
||||||
|
|
||||||
|
tar_target(
|
||||||
|
production_model_uri,
|
||||||
|
train_production_model(
|
||||||
|
data = data_full,
|
||||||
|
recipe = prod_recipe,
|
||||||
|
best_params = best_params,
|
||||||
|
model_filename = "baf_lgbm_prod_v1.txt"
|
||||||
|
),
|
||||||
|
format = "rds"
|
||||||
|
),
|
||||||
|
|
||||||
|
# ---- Saved Figure Paths ----
|
||||||
|
tar_target(fig_fraud_by_month_path, save_report_figure(fig_fraud_by_month, "fig_fraud_by_month.png"), format = "file"),
|
||||||
|
tar_target(fig_var_imp_path, save_report_figure(fig_var_imp, "fig_var_imp.png"), format = "file"),
|
||||||
|
tar_target(fig_hexbin_interaction_path, save_report_figure(fig_hexbin_interaction, "fig_hexbin_interaction.png"), format = "file"),
|
||||||
|
tar_target(fig_missingness_path, save_report_figure(fig_missingness, "fig_missingness.png"), format = "file"),
|
||||||
|
tar_target(fig_num_cor_path, save_report_figure(fig_num_cor, "fig_num_cor.png"), format = "file"),
|
||||||
|
tar_target(fig_strategy_showdown_path, save_report_figure(fig_strategy_showdown, "fig_strategy_showdown.png", out_dir = "reports/figures"), format = "file"),
|
||||||
|
tar_target(fig_final_conf_mat_path, save_report_figure(fig_final_conf_mat, "fig_final_conf_mat.png"), format = "file"),
|
||||||
|
tar_target(fig_final_curves_path, save_report_figure(fig_final_curves, "fig_final_curves.png"), format = "file"),
|
||||||
|
|
||||||
|
# ---- Saved Table Paths ----
|
||||||
|
tar_target(tbl_fraud_by_month_path, save_report_table(tbl_fraud_by_month_gt, "tbl_fraud_by_month.rds"), format = "file"),
|
||||||
|
tar_target(tbl_strategy_showdown_path, save_report_table(tbl_strategy_showdown_gt, "tbl_strategy_showdown.rds", out_dir = "reports/tables"), format = "file"),
|
||||||
|
tar_target(tbl_final_conf_mat_path, save_report_table(final_conf_mat, "tbl_final_conf_mat.rds", out_dir = "reports/tables"), format = "file"),
|
||||||
|
|
||||||
|
# ---- Report Assembly ----
|
||||||
tar_target(
|
tar_target(
|
||||||
report_assets,
|
report_assets,
|
||||||
c(
|
c(
|
||||||
@@ -294,29 +258,5 @@ list(
|
|||||||
tar_quarto(
|
tar_quarto(
|
||||||
report_slides,
|
report_slides,
|
||||||
path = "index.qmd"
|
path = "index.qmd"
|
||||||
),
|
|
||||||
# production model deployment
|
|
||||||
tar_target(
|
|
||||||
data_full,
|
|
||||||
connect_baf(baf_feature_prefix, use_duckdb = TRUE) |>
|
|
||||||
collect()
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
production_recipe_blueprint,
|
|
||||||
build_baf_recipe(data_full)
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
winning_params,
|
|
||||||
tune_lgbm(imbalance_windows)
|
|
||||||
),
|
|
||||||
tar_target(
|
|
||||||
production_model_uri,
|
|
||||||
train_production_model(
|
|
||||||
data = data_full,
|
|
||||||
recipe = production_recipe_blueprint, # <--- Pass the untrained blueprint!
|
|
||||||
best_params = winning_params,
|
|
||||||
model_filename = "baf_lgbm_prod_v1.txt"
|
|
||||||
),
|
|
||||||
format = "rds"
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -1,14 +1,14 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/functions.R
|
% Please edit documentation in R/functions.R
|
||||||
\name{prepare_eda_recipe}
|
\name{build_eda_recipe}
|
||||||
\alias{prepare_eda_recipe}
|
\alias{build_eda_recipe}
|
||||||
\title{Prepare EDA Recipe}
|
\title{Build EDA Recipe}
|
||||||
\usage{
|
\usage{
|
||||||
prepare_eda_recipe(eda_data)
|
build_eda_recipe(eda_data)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{eda_data}{Raw EDA data}
|
\item{eda_data}{Raw EDA data}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Prepare EDA Recipe
|
Build EDA Recipe
|
||||||
}
|
}
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/functions.R
|
% Please edit documentation in R/functions.R
|
||||||
\name{format_class_imbalance_tourney_gt}
|
\name{format_tournament_gt}
|
||||||
\alias{format_class_imbalance_tourney_gt}
|
\alias{format_tournament_gt}
|
||||||
\title{Format Class Imbalance Tournament Table}
|
\title{Format Tournament Results Table}
|
||||||
\usage{
|
\usage{
|
||||||
format_class_imbalance_tourney_gt(results_df)
|
format_tournament_gt(results_df)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{results_df}{The tibble output from \code{run_imbalance_tournament}.}
|
\item{results_df}{The tibble output from \code{run_imbalance_tournament}.}
|
||||||
@@ -1,14 +1,14 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/functions.R
|
% Please edit documentation in R/functions.R
|
||||||
\name{create_efficiency_plot}
|
\name{plot_efficiency}
|
||||||
\alias{create_efficiency_plot}
|
\alias{plot_efficiency}
|
||||||
\title{Create Effectiveness vs Efficiency Plot}
|
\title{Plot Effectiveness vs Efficiency}
|
||||||
\usage{
|
\usage{
|
||||||
create_efficiency_plot(results_df)
|
plot_efficiency(results_df)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{results_df}{Tibble from run_imbalance_tournament}
|
\item{results_df}{Tibble from run_imbalance_tournament}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Create Effectiveness vs Efficiency Plot
|
Plot Effectiveness vs Efficiency
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user