Files
stAndrews/data-raw/update_sales.R
Rob Wiederstein 77dbe8cc7f
All checks were successful
Deploy stAndrews / deploy (push) Successful in 5s
Consolidate weekly refresh into single orchestrator script
refresh_all.R downloads both SCPA files once, then sources
update_owners.R and update_sales.R. Single cron job at 11pm Sunday.
2026-03-09 17:22:31 -04:00

54 lines
2.2 KiB
R

# update_sales.R
# Pull the 10 most recent arm's-length sales in St. Andrews from
# SCPA_Parcels_Sales_CSV.zip (Sarasota.csv). Joins to geometry_lookup
# for coordinates. Downloads the zip fresh each run.
# Input: data-raw/addresses/geometry_lookup.rds (static)
# Output: data/sales.rds
library(readr)
library(dplyr)
library(stringr)
library(sf)
subdivisions <- c(
"8120", "8113", "8171", "8195", "8221",
"8163", "8240", "8159", "8149", "8110", "8254", "8215", "8143"
)
geometry_lookup <- readRDS("./data-raw/addresses/geometry_lookup.rds")
# ── Load Sarasota.csv from cached zip ────────────────────────────────────────
csv_con <- unz("./data-raw/property/SCPA_Parcels_Sales_CSV.zip", "Parcel_Sales_CSV/Sarasota.csv")
# ── Load and filter ───────────────────────────────────────────────────────────
sales <-
read_csv(csv_con, show_col_types = FALSE) |>
filter(SUBD %in% subdivisions) |>
filter(QUAL_CODE %in% c("01", "03")) |>
filter(SALE_AMT > 0, LIVING > 0) |>
mutate(
listed_date = as.Date(SALE_DATE, format = "%m/%d/%Y"),
address = str_squish(paste(LOCN, LOCS, LOCCITY, LOCSTATE, LOCZIP)),
sqft = as.integer(LIVING),
price = as.integer(SALE_AMT),
price_per_sqft = round(price / sqft, 0),
account_number = str_trim(ACCOUNT)
) |>
arrange(desc(listed_date)) |>
slice_head(n = 10) |>
select(account_number, listed_date, address, sqft, price, price_per_sqft)
# ── Join geometry ─────────────────────────────────────────────────────────────
sales <- sales |>
inner_join(geometry_lookup, by = "account_number") |>
st_as_sf(sf_column_name = "geom") |>
mutate(
longitude = st_coordinates(geom)[, 1],
latitude = st_coordinates(geom)[, 2]
) |>
st_drop_geometry() |>
select(listed_date, address, sqft, price, price_per_sqft, latitude, longitude)
cat("Sales written:", nrow(sales), "\n")
saveRDS(sales, "./data/sales.rds")