library(targets) library(tarchetypes) tar_option_set( packages = c( "arrow", "bonsai", "duckdb", "glue", "gt", "here", "lightgbm", "lubridate", "tidymodels", "tidyverse", "cowplot", "colorspace", "readr", "scales", "ggplot2", "quarto", "corrr", "recipes", "themis", "tidyselect" ) ) tar_source("./R/functions.R") list( tar_target( baf_parquet_prefix, convert_to_parquet( from_prefix = "01_raw", to_prefix = "02_intermediate", bucket_name = "baf-fraud" ) ), tar_target( baf_primary_prefix, clean_baf_base( in_prefix = baf_parquet_prefix, out_prefix = "03_primary/variant=Base", bucket_name = "baf-fraud", partitioning = "month", existing_data_behavior = "delete_matching", verbose = TRUE ) ), tar_target( baf_feature_prefix, engineer_features( in_prefix = baf_primary_prefix, out_prefix = "04_feature/variant=Base", bucket_name = "baf-fraud", partitioning = "month", existing_data_behavior = "delete_matching", verbose = TRUE ) ), # ---- Figure objects ---- tar_target( fig_fraud_by_month, plot_fraud_by_month(baf_primary_prefix, bucket_name = "baf-fraud") ), # ---- Saved figure path (file target) ---- tar_target( fig_fraud_by_month_path, save_report_figure( fig_fraud_by_month, filename = "fig_fraud_by_month.png", out_dir = "reports/figures" ), format = "file" ), tar_target( tbl_fraud_by_month_data, compute_fraud_by_month(baf_primary_prefix) ), tar_target( tbl_fraud_by_month_gt, format_fraud_by_month_gt(tbl_fraud_by_month_data) ), tar_target( tbl_fraud_by_month_path, save_report_table(tbl_fraud_by_month_gt, filename = "tbl_fraud_by_month.rds"), format = "file" ), # ---- Exploratory Data Analysis (EDA) Layer ---- tar_target( data_eda_m0, connect_baf(baf_primary_prefix, use_duckdb = TRUE) |> filter(month == 0) |> collect() ), tar_target( eda_recipe, prepare_eda_recipe(data_eda_m0) ), tar_target( data_baked_eda_m0, bake(eda_recipe, new_data = data_eda_m0) ), tar_target( model_diag, train_diag_model(data_baked_eda_m0) ), # ---- EDA Figures ---- tar_target(fig_var_imp, plot_var_imp(model_diag)), tar_target(fig_hexbin_interaction, plot_hexbin_interaction(data_baked_eda_m0)), tar_target(fig_missingness, plot_missingness(data_eda_m0)), tar_target(fig_num_cor, plot_num_cor(data_eda_m0)), # ---- Saved EDA Figure Paths ---- tar_target( fig_var_imp_path, save_report_figure(fig_var_imp, "fig_var_imp.png"), format = "file" ), tar_target( fig_hexbin_interaction_path, save_report_figure(fig_hexbin_interaction, "fig_hexbin_interaction.png"), format = "file" ), tar_target( fig_missingness_path, save_report_figure(fig_missingness, "fig_missingness.png"), format = "file" ), tar_target( fig_num_cor_path, save_report_figure(fig_num_cor, "fig_num_cor.png"), format = "file" ), # ---- 05_model_input Generation ---- tar_target( model_inputs_prefix, generate_model_inputs( feature_prefix = baf_feature_prefix, out_prefix = "05_model_input", bucket_name = "baf-fraud" ) ), # ---- Tournament Inputs ---- tar_target( imbalance_tasks, tibble::tribble( ~recipe_name, ~data_folder, ~scale_pos_weight, "Standard", "baseline", 1, "Weighted", "baseline", 4, "Under", "under", 1, "Smote", "smote", 1, "Adasyn", "adasyn", 1, "Tomek", "tomek", 1 ) ), tar_target( imbalance_windows, tibble::tribble( ~window_id, ~train_months, ~test_month, "Window 1", c(0, 1, 2), 3, "Window 2", c(1, 2, 3), 4, "Window 3", c(2, 3, 4), 5 ) ), # ---- 1. Data Layer (The Tournament Results) ---- tar_target( tbl_strategy_showdown, { # Force DAG to wait for the folders to be generated force(model_inputs_prefix) # Pass baf_feature_prefix so it tracks the latest layer run_imbalance_tournament(imbalance_tasks, imbalance_windows, baf_feature_prefix) } ), # ---- 2. Figure Layer ---- tar_target( fig_strategy_showdown, create_efficiency_plot(tbl_strategy_showdown) ), tar_target( fig_strategy_showdown_path, save_report_figure( fig_strategy_showdown, filename = "fig_strategy_showdown.png", out_dir = "reports/figures" ), format = "file" ), # ---- 3. Table Layer (gt object) ---- tar_target( tbl_strategy_showdown_gt, format_class_imbalance_tourney_gt(tbl_strategy_showdown) ), tar_target( tbl_strategy_showdown_path, save_report_table( tbl_strategy_showdown_gt, filename = "tbl_strategy_showdown.rds", out_dir = "reports/tables" ), format = "file" ), # ---- Final Production Evaluation ---- tar_target( final_eval_data, evaluate_final_model(params = winning_params) ), tar_target( final_conf_mat, yardstick::conf_mat(final_eval_data, truth, pred_class) ), tar_target( final_roc_curve, yardstick::roc_curve(final_eval_data, truth, prob) ), tar_target( final_pr_curve, yardstick::pr_curve(final_eval_data, truth, prob) ), # ---- Save Final Assets ---- tar_target( fig_final_curves, { p1 <- ggplot2::autoplot(final_roc_curve) + ggplot2::labs(title = "ROC Curve (Months 6-7)") p2 <- ggplot2::autoplot(final_pr_curve) + ggplot2::labs(title = "PR Curve (Months 6-7)") cowplot::plot_grid(p1, p2) } ), tar_target( fig_final_curves_path, save_report_figure(fig_final_curves, "fig_final_curves.png"), format = "file" ), tar_target( tbl_final_conf_mat_path, save_report_table(final_conf_mat, "tbl_final_conf_mat.rds", out_dir = "reports/tables"), format = "file" ), # ---- Generate and Save Heatmap ---- tar_target( fig_final_conf_mat, plot_conf_mat_heatmap(final_conf_mat) ), tar_target( fig_final_conf_mat_path, save_report_figure(fig_final_conf_mat, "fig_final_conf_mat.png"), format = "file" ), # ---- Report Dependency Update ---- tar_target( report_assets, c( fig_fraud_by_month_path, tbl_fraud_by_month_path, fig_strategy_showdown_path, tbl_strategy_showdown_path, fig_var_imp_path, fig_hexbin_interaction_path, fig_missingness_path, fig_num_cor_path ), format = "file" ), tar_quarto( report_slides, path = "index.qmd" ), # production model deployment tar_target( data_full, connect_baf(baf_feature_prefix, use_duckdb = TRUE) |> collect() ), tar_target( production_recipe_blueprint, build_baf_recipe(data_full) ), tar_target( winning_params, tune_lgbm(imbalance_windows) ), tar_target( production_model_uri, train_production_model( data = data_full, recipe = production_recipe_blueprint, # <--- Pass the untrained blueprint! best_params = winning_params, model_filename = "baf_lgbm_prod_v1.txt" ), format = "rds" ) )