diff --git a/data-raw/update_listings.R b/data-raw/update_listings.R index a5b0ba2..3d37460 100644 --- a/data-raw/update_listings.R +++ b/data-raw/update_listings.R @@ -51,13 +51,51 @@ in_plat <- lengths(sf::st_within(listings_sf, plats)) > 0 listings <- listings_raw[in_plat, ] cat("After plat clip:", nrow(listings), "listings\n") +# ── Override RentCast coordinates with building footprint geometry ──────────── +# RentCast geocoding is approximate. Our owners data uses building centroids +# from Sarasota County GIS footprints — far more accurate. Match on house +# number + street name and substitute when found. +owners_sf <- readRDS("./data/owners.rds") + +# Extract house number from owners location field (e.g. "878 CHALMERS DR, Venice FL") +owners_coords <- owners_sf |> + mutate( + house_num = trimws(sub("^(\\d+).*", "\\1", location)), + street_raw = trimws(sub("^\\d+\\s+(.*),.*$", "\\1", location)), + match_key = paste(house_num, toupper(street_raw)) + ) |> + select(match_key, geom) |> + distinct(match_key, .keep_all = TRUE) + +# Extract house number + street from RentCast address +# e.g. "878 Chalmers Dr, Unit 878, Venice, FL 34293" -> "878 CHALMERS DR" +listings <- listings |> + mutate( + house_num = sub("^(\\d+)\\s.*", "\\1", addressLine1), + street_raw = gsub("[^A-Za-z ]", "", sub("^\\d+\\s+(\\S+\\s+\\S+).*", "\\1", addressLine1)), + match_key = paste(house_num, toupper(trimws(street_raw))) + ) + +matched <- merge(listings, owners_coords, by = "match_key", all.x = TRUE) + +# For matched rows, replace RentCast lat/lng with footprint centroid coords +has_geom <- !is.na(matched$geom) +if (any(has_geom)) { + coords <- sf::st_coordinates(sf::st_as_sf(matched[has_geom, ], sf_column_name = "geom")) + matched$longitude[has_geom] <- coords[, "X"] + matched$latitude[has_geom] <- coords[, "Y"] + cat("Coordinates corrected from building footprints:", sum(has_geom), "listing(s)\n") +} + +listings <- matched + # ── Select and clean columns ────────────────────────────────────────────────── listings <- listings |> transmute( - listed_date = as.Date(listedDate), - address = formattedAddress, - sqft = as.numeric(squareFootage), - price = as.numeric(price), + listed_date = as.Date(listedDate), + address = formattedAddress, + sqft = as.numeric(squareFootage), + price = as.numeric(price), price_per_sqft = round(price / sqft, 0), latitude, longitude diff --git a/data/listings.rds b/data/listings.rds index 23e5cbd..e97d3c4 100644 Binary files a/data/listings.rds and b/data/listings.rds differ