From 85bc257e7bc6fd250fd21b619b25e39bac1ad9ff Mon Sep 17 00:00:00 2001 From: Rob Wiederstein Date: Mon, 23 Feb 2026 09:38:54 -0500 Subject: [PATCH] Rename package from baflakehouse to bankfraud MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - DESCRIPTION: Package name and URL updated to /bank-fraud - R/baflakehouse-package.R → R/bankfraud-package.R - _pkgdown.yml: url and reference alias updated - deploy.yaml: TARGET_DIR updated to /var/www/docs/bank-fraud/ - deploy/baflakehouse.caddy: deleted (stale, superseded by rsync workflow) - tests and README updated Co-Authored-By: Claude Sonnet 4.6 --- .gitea/workflows/deploy.yaml | 2 +- DESCRIPTION | 4 ++-- Dockerfile | 13 ++++++++----- R/{baflakehouse-package.R => bankfraud-package.R} | 4 ++-- README.md | 6 +++--- _pkgdown.yml | 4 ++-- deploy/baflakehouse.caddy | 13 ------------- tests/testthat.R | 4 ++-- 8 files changed, 20 insertions(+), 30 deletions(-) rename R/{baflakehouse-package.R => bankfraud-package.R} (69%) delete mode 100644 deploy/baflakehouse.caddy diff --git a/.gitea/workflows/deploy.yaml b/.gitea/workflows/deploy.yaml index 487975f..5499e52 100644 --- a/.gitea/workflows/deploy.yaml +++ b/.gitea/workflows/deploy.yaml @@ -57,7 +57,7 @@ jobs: SSH_PRIVATE_KEY: ${{ secrets.DEPLOY_SSH_KEY }} SERVER_IP: ${{ secrets.DEPLOY_SERVER_IP }} SERVER_USER: ${{ secrets.DEPLOY_SERVER_USER }} - TARGET_DIR: /var/www/docs/baflakehouse/ + TARGET_DIR: /var/www/docs/bank-fraud/ run: | # Setup SSH key mkdir -p ~/.ssh diff --git a/DESCRIPTION b/DESCRIPTION index 8b42e50..8dc447d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: baflakehouse +Package: bankfraud Title: Lakehouse Workflow for the Bank Account Fraud Dataset Version: 0.0.0.9000 Authors@R: @@ -52,5 +52,5 @@ Suggests: testthat (>= 3.0.0), withr Config/testthat/edition: 3 -URL: https://docs.robwiederstein.org/baflakehouse +URL: https://docs.robwiederstein.org/bank-fraud BugReports: https://git.robwiederstein.org/rkw/bank-fraud-baf-lakehouse/issues diff --git a/Dockerfile b/Dockerfile index 4bc391c..6e78632 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rocker/verse:4.4 +FROM rocker/verse:4.5.2 # System dependencies for arrow, lightgbm, and ggplot2 (ragg/textshaping) # Quarto is pre-installed in rocker/verse @@ -25,13 +25,16 @@ WORKDIR /app COPY renv.lock .Rprofile ./ COPY renv/activate.R renv/settings.json renv/ -RUN Rscript -e "renv::restore()" +RUN Rscript -e "renv::restore(prompt = FALSE)" # Copy the full package source COPY . . -# Install the local package into the renv library -RUN Rscript -e "renv::install('.')" +# Install the local package into the renv library, then re-run restore so +# any package that renv skipped by finding it in the rocker system library +# (e.g. styler) ends up in the project library where renv can actually see it. +RUN Rscript -e "renv::install('.')" && \ + Rscript -e "renv::restore(prompt = FALSE)" # Non-secret default — override with --env at runtime if needed ENV BAF_BUCKET=lake @@ -41,5 +44,5 @@ ENV BAF_BUCKET=lake # --env BAF_ENDPOINT=172.19.0.1:9100 \ # --env BAF_KEY=... \ # --env BAF_SECRET=... \ -# baflakehouse +# bankfraud CMD ["Rscript", "deploy.R"] diff --git a/R/baflakehouse-package.R b/R/bankfraud-package.R similarity index 69% rename from R/baflakehouse-package.R rename to R/bankfraud-package.R index f5bb9f6..4941284 100644 --- a/R/baflakehouse-package.R +++ b/R/bankfraud-package.R @@ -1,9 +1,9 @@ -#' baflakehouse: Lakehouse Workflow for the Bank Account Fraud Dataset +#' bankfraud: Lakehouse Workflow for the Bank Account Fraud Dataset #' #' Tools to ingest the Bank Account Fraud (BAF) Base dataset into a MinIO/S3-backed #' lakehouse, clean encoded missing values, and produce reproducible reporting #' artifacts orchestrated with targets. #' #' @docType _PACKAGE -#' @name baflakehouse-package +#' @name bankfraud-package NULL diff --git a/README.md b/README.md index 43eec68..7b25484 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,18 @@ output: github_document --- -- [baflakehouse](#baflakehouse) +- [bankfraud](#bankfraud) - [About](#about) - [Results](#results) - [Clone](#clone) - [Acknowledgements](#acknowledgements) - [Citation](#citation) -# baflakehouse +# bankfraud ## About -The baflakehouse package is an end-to-end machine learning pipeline built to detect credit card fraud. Rather than relying on static local files, it implements a modern Lakehouse architecture. It ingests a massive 1-million-row dataset, partitions it into Parquet files via Apache Arrow, stores it on a MinIO object server, and trains a production-ready LightGBM model orchestrated entirely by the targets package. +The bankfraud package is an end-to-end machine learning pipeline built to detect credit card fraud. Rather than relying on static local files, it implements a modern Lakehouse architecture. It ingests a massive 1-million-row dataset, partitions it into Parquet files via Apache Arrow, stores it on a MinIO object server, and trains a production-ready LightGBM model orchestrated entirely by the targets package. Significance Financial fraud datasets suffer from extreme class imbalance, making traditional accuracy metrics highly misleading. This pipeline is engineered specifically to handle that imbalance without aggressive synthetic oversampling. diff --git a/_pkgdown.yml b/_pkgdown.yml index 89d3ae3..61ea947 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: https://docs.robwiederstein.org/baflakehouse +url: https://docs.robwiederstein.org/bank-fraud template: bootstrap: 5 @@ -17,7 +17,7 @@ reference: - title: "Data Ingestion & Lakehouse Setup" desc: "Functions for moving raw CSV data into the MinIO Lakehouse as partitioned Parquet." contents: - - baflakehouse-package + - bankfraud-package - convert_to_parquet - connect_baf - clean_baf_base diff --git a/deploy/baflakehouse.caddy b/deploy/baflakehouse.caddy deleted file mode 100644 index 30597e3..0000000 --- a/deploy/baflakehouse.caddy +++ /dev/null @@ -1,13 +0,0 @@ -# BAF Lakehouse pkgdown site -# Served at: https://docs.robwiederstein.org/baflakehouse -# -# handle_path strips the /baflakehouse prefix before handing off to the -# file server, so requests map correctly to the flat docs/ directory. -# -# NOTE: The path below must match the mount point inside the Caddy Docker -# container (i.e., wherever /data/projects/ is mounted in docker-compose.yml). - -handle_path /baflakehouse* { - root * /data/projects/bank-fraud-baf-lakehouse/docs - file_server -} diff --git a/tests/testthat.R b/tests/testthat.R index fabf3bc..3609ef7 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) -library(baflakehouse) +library(bankfraud) -test_check("baflakehouse") +test_check("bankfraud")