Files
stAndrews/data-raw/update_owners.R
Rob Wiederstein 43552a937e Add deployment pipeline and clean up repo
- Add Dockerfile, docker-compose.yml, .dockerignore, .env (port 3842)
- Add Caddyfile.snippet for analytics gateway import pattern
- Add .gitea/workflows/deploy.yaml for act_runner SSH deploy
- Untrack sensitive/data files (SCPA xlsx, owners.rds)
- Add renv lockfile and infrastructure files
- Reorganize data-raw scripts and add SarasotaCounty boundary data
- Move www assets to www/images/, add docs PDFs
2026-03-09 10:38:21 -04:00

83 lines
2.7 KiB
R

# update_owners.R
# Weekly update script. Reads fresh SCPA property data, joins to stable
# geometry lookup by account_number, saves data/owners.rds.
# Only the SCPA xlsx needs to be replaced to refresh ownership data.
# Input: data-raw/property/SCPA Public.xlsx (replace weekly)
# data-raw/addresses/geometry_lookup.rds (static)
# Output: data/owners.rds
library(readxl)
library(janitor)
library(dplyr)
library(stringr)
library(sf)
subdivisions <- c(
"8120", "8113", "8171", "8195", "8221",
"8163", "8240", "8159", "8149", "8110", "8254", "8215", "8143"
)
# load geometry lookup (static) ----
geometry_lookup <- readRDS("./data-raw/addresses/geometry_lookup.rds")
# download fresh scpa data ----
download.file(
url = "https://www.sc-pa.com/downloads/SCPA%20Public.xlsx",
destfile = "./data-raw/property/SCPA Public.xlsx",
mode = "wb"
)
# load and clean scpa data ----
owners_raw <-
readxl::read_xlsx(
path = "./data-raw/property/SCPA Public.xlsx",
n_max = Inf,
.name_repair = ~janitor::make_clean_names(.x)
) |>
filter(subdivision %in% subdivisions) |>
rename(
situs_address = situs_address_property_address,
homestead = homestead_exemption_yes_or_no
) |>
filter(!is.na(situs_address)) |>
filter(!grepl("^0", situs_address)) |>
mutate(
# extract clean street address (before multiple spaces / unit suffix)
label = str_trim(str_extract(situs_address, "^\\d+\\s+\\S+\\s+\\S+")),
location = paste0(label, ", Venice FL")
) |>
select(account_number, owner_1, owner_2, subdivision, homestead, label, location)
# join to geometry ----
owners <- owners_raw |>
inner_join(geometry_lookup, by = "account_number") |>
st_as_sf(sf_column_name = "geom")
# report any unmatched records ----
n_unmatched <- nrow(owners_raw) - nrow(owners)
if (n_unmatched > 0) {
cat("WARNING:", n_unmatched, "records had no matching geometry and were dropped.\n")
missing <- anti_join(owners_raw, st_drop_geometry(geometry_lookup), by = "account_number")
print(missing)
}
# report most recent sale date in st. andrews ----
latest_sale <-
readxl::read_xlsx(
path = "./data-raw/property/SCPA Public.xlsx",
n_max = Inf,
.name_repair = ~janitor::make_clean_names(.x)
) |>
filter(subdivision %in% subdivisions) |>
select(account_number, owner_1, contains("sale")) |>
filter(!is.na(account_number)) |>
mutate(last_sale_date = as.Date(last_sale_date, format = "%m/%d/%Y")) |>
arrange(desc(last_sale_date)) |>
head(1)
cat("Owners written:", nrow(owners), "\n")
attr(owners, "last_sale_date") <- latest_sale$last_sale_date
saveRDS(owners, "./data/owners.rds")
cat("Saved to data/owners.rds\n")
cat("Most recent sale date:", format(latest_sale$last_sale_date, "%B %d, %Y"), "\n")