Refactor: consistent naming across functions, targets, and pkgdown
Functions: prepare_eda_recipe -> build_eda_recipe,
create_efficiency_plot -> plot_efficiency,
format_class_imbalance_tourney_gt -> format_tournament_gt
Targets: model_inputs_prefix -> baf_model_input_prefix,
tbl_fraud_by_month_data -> fraud_by_month_summary,
model_diag -> diag_fit, winning_params -> best_params,
production_recipe_blueprint -> prod_recipe,
final_eval_data -> test_predictions
pkgdown: restructured reference index into 6 logical sections,
removed stale names and development comments.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
272
_targets.R
272
_targets.R
@@ -19,9 +19,9 @@ tar_option_set(
|
||||
"scales",
|
||||
"ggplot2",
|
||||
"quarto",
|
||||
"corrr",
|
||||
"recipes",
|
||||
"themis",
|
||||
"corrr",
|
||||
"recipes",
|
||||
"themis",
|
||||
"tidyselect"
|
||||
)
|
||||
)
|
||||
@@ -37,7 +37,7 @@ list(
|
||||
bucket_name = "baf-fraud"
|
||||
)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
baf_primary_prefix,
|
||||
clean_baf_base(
|
||||
@@ -49,7 +49,7 @@ list(
|
||||
verbose = TRUE
|
||||
)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
baf_feature_prefix,
|
||||
engineer_features(
|
||||
@@ -61,98 +61,60 @@ list(
|
||||
verbose = TRUE
|
||||
)
|
||||
),
|
||||
|
||||
# ---- Figure objects ----
|
||||
|
||||
# ---- 05_model_input Generation ----
|
||||
tar_target(
|
||||
fig_fraud_by_month,
|
||||
plot_fraud_by_month(baf_primary_prefix, bucket_name = "baf-fraud")
|
||||
baf_model_input_prefix,
|
||||
generate_model_inputs(
|
||||
feature_prefix = baf_feature_prefix,
|
||||
out_prefix = "05_model_input",
|
||||
bucket_name = "baf-fraud"
|
||||
)
|
||||
),
|
||||
|
||||
# ---- Saved figure path (file target) ----
|
||||
tar_target(
|
||||
fig_fraud_by_month_path,
|
||||
save_report_figure(
|
||||
fig_fraud_by_month,
|
||||
filename = "fig_fraud_by_month.png",
|
||||
out_dir = "reports/figures"
|
||||
),
|
||||
format = "file"
|
||||
),
|
||||
tar_target(
|
||||
tbl_fraud_by_month_data,
|
||||
compute_fraud_by_month(baf_primary_prefix)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
tbl_fraud_by_month_gt,
|
||||
format_fraud_by_month_gt(tbl_fraud_by_month_data)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
tbl_fraud_by_month_path,
|
||||
save_report_table(tbl_fraud_by_month_gt, filename = "tbl_fraud_by_month.rds"),
|
||||
format = "file"
|
||||
),
|
||||
|
||||
# ---- Exploratory Data Analysis (EDA) Layer ----
|
||||
|
||||
# ---- EDA Layer ----
|
||||
tar_target(
|
||||
data_eda_m0,
|
||||
connect_baf(baf_primary_prefix, use_duckdb = TRUE) |>
|
||||
filter(month == 0) |>
|
||||
collect()
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
eda_recipe,
|
||||
prepare_eda_recipe(data_eda_m0)
|
||||
build_eda_recipe(data_eda_m0)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
data_baked_eda_m0,
|
||||
bake(eda_recipe, new_data = data_eda_m0)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
model_diag,
|
||||
diag_fit,
|
||||
train_diag_model(data_baked_eda_m0)
|
||||
),
|
||||
|
||||
|
||||
# ---- EDA Figures ----
|
||||
tar_target(fig_var_imp, plot_var_imp(model_diag)),
|
||||
tar_target(fig_var_imp, plot_var_imp(diag_fit)),
|
||||
tar_target(fig_hexbin_interaction, plot_hexbin_interaction(data_baked_eda_m0)),
|
||||
tar_target(fig_missingness, plot_missingness(data_eda_m0)),
|
||||
tar_target(fig_num_cor, plot_num_cor(data_eda_m0)),
|
||||
|
||||
# ---- Saved EDA Figure Paths ----
|
||||
tar_target(fig_missingness, plot_missingness(data_eda_m0)),
|
||||
tar_target(fig_num_cor, plot_num_cor(data_eda_m0)),
|
||||
|
||||
# ---- Fraud Prevalence ----
|
||||
tar_target(
|
||||
fig_var_imp_path,
|
||||
save_report_figure(fig_var_imp, "fig_var_imp.png"),
|
||||
format = "file"
|
||||
fig_fraud_by_month,
|
||||
plot_fraud_by_month(baf_primary_prefix, bucket_name = "baf-fraud")
|
||||
),
|
||||
|
||||
tar_target(
|
||||
fig_hexbin_interaction_path,
|
||||
save_report_figure(fig_hexbin_interaction, "fig_hexbin_interaction.png"),
|
||||
format = "file"
|
||||
fraud_by_month_summary,
|
||||
compute_fraud_by_month(baf_primary_prefix)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
fig_missingness_path,
|
||||
save_report_figure(fig_missingness, "fig_missingness.png"),
|
||||
format = "file"
|
||||
),
|
||||
tar_target(
|
||||
fig_num_cor_path,
|
||||
save_report_figure(fig_num_cor, "fig_num_cor.png"),
|
||||
format = "file"
|
||||
),
|
||||
|
||||
# ---- 05_model_input Generation ----
|
||||
tar_target(
|
||||
model_inputs_prefix,
|
||||
generate_model_inputs(
|
||||
feature_prefix = baf_feature_prefix,
|
||||
out_prefix = "05_model_input",
|
||||
bucket_name = "baf-fraud"
|
||||
)
|
||||
tbl_fraud_by_month_gt,
|
||||
format_fraud_by_month_gt(fraud_by_month_summary)
|
||||
),
|
||||
|
||||
# ---- Tournament Inputs ----
|
||||
@@ -161,14 +123,14 @@ list(
|
||||
tibble::tribble(
|
||||
~recipe_name, ~data_folder, ~scale_pos_weight,
|
||||
"Standard", "baseline", 1,
|
||||
"Weighted", "baseline", 4,
|
||||
"Weighted", "baseline", 4,
|
||||
"Under", "under", 1,
|
||||
"Smote", "smote", 1,
|
||||
"Adasyn", "adasyn", 1,
|
||||
"Tomek", "tomek", 1
|
||||
)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
imbalance_windows,
|
||||
tibble::tribble(
|
||||
@@ -178,110 +140,112 @@ list(
|
||||
"Window 3", c(2, 3, 4), 5
|
||||
)
|
||||
),
|
||||
|
||||
# ---- 1. Data Layer (The Tournament Results) ----
|
||||
|
||||
# ---- Hyperparameter Tuning ----
|
||||
tar_target(
|
||||
best_params,
|
||||
tune_lgbm(imbalance_windows)
|
||||
),
|
||||
|
||||
# ---- Tournament Results ----
|
||||
tar_target(
|
||||
tbl_strategy_showdown,
|
||||
{
|
||||
# Force DAG to wait for the folders to be generated
|
||||
force(model_inputs_prefix)
|
||||
# Pass baf_feature_prefix so it tracks the latest layer
|
||||
force(baf_model_input_prefix)
|
||||
run_imbalance_tournament(imbalance_tasks, imbalance_windows, baf_feature_prefix)
|
||||
}
|
||||
),
|
||||
|
||||
# ---- 2. Figure Layer ----
|
||||
|
||||
tar_target(
|
||||
fig_strategy_showdown,
|
||||
create_efficiency_plot(tbl_strategy_showdown)
|
||||
plot_efficiency(tbl_strategy_showdown)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
fig_strategy_showdown_path,
|
||||
save_report_figure(
|
||||
fig_strategy_showdown,
|
||||
filename = "fig_strategy_showdown.png",
|
||||
out_dir = "reports/figures"
|
||||
),
|
||||
format = "file"
|
||||
),
|
||||
|
||||
# ---- 3. Table Layer (gt object) ----
|
||||
|
||||
tar_target(
|
||||
tbl_strategy_showdown_gt,
|
||||
format_class_imbalance_tourney_gt(tbl_strategy_showdown)
|
||||
format_tournament_gt(tbl_strategy_showdown)
|
||||
),
|
||||
|
||||
|
||||
# ---- Final Evaluation (Months 6-7) ----
|
||||
tar_target(
|
||||
tbl_strategy_showdown_path,
|
||||
save_report_table(
|
||||
tbl_strategy_showdown_gt,
|
||||
filename = "tbl_strategy_showdown.rds",
|
||||
out_dir = "reports/tables"
|
||||
),
|
||||
format = "file"
|
||||
test_predictions,
|
||||
evaluate_final_model(params = best_params)
|
||||
),
|
||||
|
||||
# ---- Final Production Evaluation ----
|
||||
|
||||
tar_target(
|
||||
final_eval_data,
|
||||
evaluate_final_model(params = winning_params)
|
||||
final_conf_mat,
|
||||
yardstick::conf_mat(test_predictions, truth, pred_class)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
final_conf_mat,
|
||||
yardstick::conf_mat(final_eval_data, truth, pred_class)
|
||||
final_roc_curve,
|
||||
yardstick::roc_curve(test_predictions, truth, prob)
|
||||
),
|
||||
|
||||
|
||||
tar_target(
|
||||
final_roc_curve,
|
||||
yardstick::roc_curve(final_eval_data, truth, prob)
|
||||
final_pr_curve,
|
||||
yardstick::pr_curve(test_predictions, truth, prob)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
final_pr_curve,
|
||||
yardstick::pr_curve(final_eval_data, truth, prob)
|
||||
),
|
||||
|
||||
# ---- Save Final Assets ----
|
||||
|
||||
tar_target(
|
||||
fig_final_curves,
|
||||
{
|
||||
p1 <- ggplot2::autoplot(final_roc_curve) + ggplot2::labs(title = "ROC Curve (Months 6-7)")
|
||||
p2 <- ggplot2::autoplot(final_pr_curve) + ggplot2::labs(title = "PR Curve (Months 6-7)")
|
||||
p2 <- ggplot2::autoplot(final_pr_curve) + ggplot2::labs(title = "PR Curve (Months 6-7)")
|
||||
cowplot::plot_grid(p1, p2)
|
||||
}
|
||||
),
|
||||
|
||||
tar_target(
|
||||
fig_final_curves_path,
|
||||
save_report_figure(fig_final_curves, "fig_final_curves.png"),
|
||||
format = "file"
|
||||
),
|
||||
|
||||
tar_target(
|
||||
tbl_final_conf_mat_path,
|
||||
save_report_table(final_conf_mat, "tbl_final_conf_mat.rds", out_dir = "reports/tables"),
|
||||
format = "file"
|
||||
),
|
||||
# ---- Generate and Save Heatmap ----
|
||||
|
||||
tar_target(
|
||||
fig_final_conf_mat,
|
||||
plot_conf_mat_heatmap(final_conf_mat)
|
||||
),
|
||||
|
||||
|
||||
# ---- Production Deployment ----
|
||||
tar_target(
|
||||
fig_final_conf_mat_path,
|
||||
save_report_figure(fig_final_conf_mat, "fig_final_conf_mat.png"),
|
||||
format = "file"
|
||||
data_full,
|
||||
connect_baf(baf_feature_prefix, use_duckdb = TRUE) |>
|
||||
collect()
|
||||
),
|
||||
# ---- Report Dependency Update ----
|
||||
|
||||
tar_target(
|
||||
prod_recipe,
|
||||
build_baf_recipe(data_full)
|
||||
),
|
||||
|
||||
tar_target(
|
||||
production_model_uri,
|
||||
train_production_model(
|
||||
data = data_full,
|
||||
recipe = prod_recipe,
|
||||
best_params = best_params,
|
||||
model_filename = "baf_lgbm_prod_v1.txt"
|
||||
),
|
||||
format = "rds"
|
||||
),
|
||||
|
||||
# ---- Saved Figure Paths ----
|
||||
tar_target(fig_fraud_by_month_path, save_report_figure(fig_fraud_by_month, "fig_fraud_by_month.png"), format = "file"),
|
||||
tar_target(fig_var_imp_path, save_report_figure(fig_var_imp, "fig_var_imp.png"), format = "file"),
|
||||
tar_target(fig_hexbin_interaction_path, save_report_figure(fig_hexbin_interaction, "fig_hexbin_interaction.png"), format = "file"),
|
||||
tar_target(fig_missingness_path, save_report_figure(fig_missingness, "fig_missingness.png"), format = "file"),
|
||||
tar_target(fig_num_cor_path, save_report_figure(fig_num_cor, "fig_num_cor.png"), format = "file"),
|
||||
tar_target(fig_strategy_showdown_path, save_report_figure(fig_strategy_showdown, "fig_strategy_showdown.png", out_dir = "reports/figures"), format = "file"),
|
||||
tar_target(fig_final_conf_mat_path, save_report_figure(fig_final_conf_mat, "fig_final_conf_mat.png"), format = "file"),
|
||||
tar_target(fig_final_curves_path, save_report_figure(fig_final_curves, "fig_final_curves.png"), format = "file"),
|
||||
|
||||
# ---- Saved Table Paths ----
|
||||
tar_target(tbl_fraud_by_month_path, save_report_table(tbl_fraud_by_month_gt, "tbl_fraud_by_month.rds"), format = "file"),
|
||||
tar_target(tbl_strategy_showdown_path, save_report_table(tbl_strategy_showdown_gt, "tbl_strategy_showdown.rds", out_dir = "reports/tables"), format = "file"),
|
||||
tar_target(tbl_final_conf_mat_path, save_report_table(final_conf_mat, "tbl_final_conf_mat.rds", out_dir = "reports/tables"), format = "file"),
|
||||
|
||||
# ---- Report Assembly ----
|
||||
tar_target(
|
||||
report_assets,
|
||||
c(
|
||||
fig_fraud_by_month_path,
|
||||
tbl_fraud_by_month_path,
|
||||
fig_strategy_showdown_path,
|
||||
fig_strategy_showdown_path,
|
||||
tbl_strategy_showdown_path,
|
||||
fig_var_imp_path,
|
||||
fig_hexbin_interaction_path,
|
||||
@@ -290,33 +254,9 @@ list(
|
||||
),
|
||||
format = "file"
|
||||
),
|
||||
|
||||
|
||||
tar_quarto(
|
||||
report_slides,
|
||||
path = "index.qmd"
|
||||
),
|
||||
# production model deployment
|
||||
tar_target(
|
||||
data_full,
|
||||
connect_baf(baf_feature_prefix, use_duckdb = TRUE) |>
|
||||
collect()
|
||||
),
|
||||
tar_target(
|
||||
production_recipe_blueprint,
|
||||
build_baf_recipe(data_full)
|
||||
),
|
||||
tar_target(
|
||||
winning_params,
|
||||
tune_lgbm(imbalance_windows)
|
||||
),
|
||||
tar_target(
|
||||
production_model_uri,
|
||||
train_production_model(
|
||||
data = data_full,
|
||||
recipe = production_recipe_blueprint, # <--- Pass the untrained blueprint!
|
||||
best_params = winning_params,
|
||||
model_filename = "baf_lgbm_prod_v1.txt"
|
||||
),
|
||||
format = "rds"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user