# update_sales.R # Pull the 10 most recent arm's-length sales in St. Andrews from # SCPA_Parcels_Sales_CSV.zip (Sarasota.csv). Joins to geometry_lookup # for coordinates. Downloads the zip fresh each run. # Input: data-raw/addresses/geometry_lookup.rds (static) # Output: data/sales.rds library(readr) library(dplyr) library(stringr) library(sf) subdivisions <- c( "8120", "8113", "8171", "8195", "8221", "8163", "8240", "8159", "8149", "8110", "8254", "8215", "8143" ) geometry_lookup <- readRDS("./data-raw/addresses/geometry_lookup.rds") # ── Download and extract Sarasota.csv ───────────────────────────────────────── zip_path <- "./data-raw/property/SCPA_Parcels_Sales_CSV.zip" options(timeout = 300) download.file( url = "https://www.sc-pa.com/downloads/SCPA_Parcels_Sales_CSV.zip", destfile = zip_path, mode = "wb" ) csv_con <- unz(zip_path, "Parcel_Sales_CSV/Sarasota.csv") # ── Load and filter ─────────────────────────────────────────────────────────── sales <- read_csv(csv_con, show_col_types = FALSE) |> filter(SUBD %in% subdivisions) |> filter(QUAL_CODE %in% c("01", "03")) |> filter(SALE_AMT > 0, LIVING > 0) |> mutate( listed_date = as.Date(SALE_DATE, format = "%m/%d/%Y"), address = str_squish(paste(LOCN, LOCS, LOCCITY, LOCSTATE, LOCZIP)), sqft = as.integer(LIVING), price = as.integer(SALE_AMT), price_per_sqft = round(price / sqft, 0), account_number = str_trim(ACCOUNT) ) |> arrange(desc(listed_date)) |> slice_head(n = 10) |> select(account_number, listed_date, address, sqft, price, price_per_sqft) # ── Join geometry ───────────────────────────────────────────────────────────── sales <- sales |> inner_join(geometry_lookup, by = "account_number") |> st_as_sf(sf_column_name = "geom") |> mutate( longitude = st_coordinates(geom)[, 1], latitude = st_coordinates(geom)[, 2] ) |> st_drop_geometry() |> select(listed_date, address, sqft, price, price_per_sqft, latitude, longitude) cat("Sales written:", nrow(sales), "\n") saveRDS(sales, "./data/sales.rds")