From d953edce545792c5d3987ddff3ab59d511e15208 Mon Sep 17 00:00:00 2001 From: Rob Wiederstein Date: Thu, 16 Apr 2026 06:25:34 -0400 Subject: [PATCH] Fix listing coordinates using building footprint geometry RentCast geocoding is approximate and placed 878 Chalmers in the lake. Override RentCast lat/lng with building centroid coords from owners.rds when address matches, giving accurate per-structure placement. --- data-raw/update_listings.R | 46 +++++++++++++++++++++++++++++++++---- data/listings.rds | Bin 637 -> 693 bytes 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/data-raw/update_listings.R b/data-raw/update_listings.R index a5b0ba2..3d37460 100644 --- a/data-raw/update_listings.R +++ b/data-raw/update_listings.R @@ -51,13 +51,51 @@ in_plat <- lengths(sf::st_within(listings_sf, plats)) > 0 listings <- listings_raw[in_plat, ] cat("After plat clip:", nrow(listings), "listings\n") +# ── Override RentCast coordinates with building footprint geometry ──────────── +# RentCast geocoding is approximate. Our owners data uses building centroids +# from Sarasota County GIS footprints — far more accurate. Match on house +# number + street name and substitute when found. +owners_sf <- readRDS("./data/owners.rds") + +# Extract house number from owners location field (e.g. "878 CHALMERS DR, Venice FL") +owners_coords <- owners_sf |> + mutate( + house_num = trimws(sub("^(\\d+).*", "\\1", location)), + street_raw = trimws(sub("^\\d+\\s+(.*),.*$", "\\1", location)), + match_key = paste(house_num, toupper(street_raw)) + ) |> + select(match_key, geom) |> + distinct(match_key, .keep_all = TRUE) + +# Extract house number + street from RentCast address +# e.g. "878 Chalmers Dr, Unit 878, Venice, FL 34293" -> "878 CHALMERS DR" +listings <- listings |> + mutate( + house_num = sub("^(\\d+)\\s.*", "\\1", addressLine1), + street_raw = gsub("[^A-Za-z ]", "", sub("^\\d+\\s+(\\S+\\s+\\S+).*", "\\1", addressLine1)), + match_key = paste(house_num, toupper(trimws(street_raw))) + ) + +matched <- merge(listings, owners_coords, by = "match_key", all.x = TRUE) + +# For matched rows, replace RentCast lat/lng with footprint centroid coords +has_geom <- !is.na(matched$geom) +if (any(has_geom)) { + coords <- sf::st_coordinates(sf::st_as_sf(matched[has_geom, ], sf_column_name = "geom")) + matched$longitude[has_geom] <- coords[, "X"] + matched$latitude[has_geom] <- coords[, "Y"] + cat("Coordinates corrected from building footprints:", sum(has_geom), "listing(s)\n") +} + +listings <- matched + # ── Select and clean columns ────────────────────────────────────────────────── listings <- listings |> transmute( - listed_date = as.Date(listedDate), - address = formattedAddress, - sqft = as.numeric(squareFootage), - price = as.numeric(price), + listed_date = as.Date(listedDate), + address = formattedAddress, + sqft = as.numeric(squareFootage), + price = as.numeric(price), price_per_sqft = round(price / sqft, 0), latitude, longitude diff --git a/data/listings.rds b/data/listings.rds index 23e5cbdccbde5ffbcd8d440dbe8c8beea9d0929c..e97d3c47ad4ceb617d4a3870c55eb6cac3727fc7 100644 GIT binary patch literal 693 zcmV;m0!sZKiwFP!000001B>8dU|?WoU}0fqU}gm}8CXL@+;lB~V!}WUJCNoB(%cSL zgc?Ay2wK1aLh~Mg&}iD zbrjru6pT%bERCUBbSw>w6dY4hGV{_)^79m&OQ3oT4UBQ>(XccyRtQNfDoM;kSYl*` zQ@yUCp{YW6ett?)QD#bdDv}-pL!6p4EzHd^EHN}N!l^;Q(!f-~DX|C`V3lBh;569S zM8P*ducRoy7|AV`cnmf)A;w^!H!ZPx6JJ;=TLQx-xSSaCf$lKEua1EMk~HV6fh4IZ zkObv0%K<{qJOQESC_u$w@*NO*225N7l@Fz1<~xcgZ2)l`dDerIx}$Jg0fdH_<0!1#k0 z&q+XWzk{{#vL9-1R|Dz)K34_(fplRAgHfo1wb0z`o&9}4e)g{T3yXmC`%kj0#z5NM z^5Ujv4%Q-K9J`_BxQLU;65M8M2H5HejuG&VjQNIU*$(~Aev zF)|4q*MW4Vv8ME9AniQgM$QvRv+R!AGZRQ_H!pk13#7TkIz@DVboLJKJ?t>^fqd@h zzW6ujMMGXM7X5P=pR#f51lFe3_zfGG&d z;RAEx3sQ^Xk(6-cB$i~Bl%~K`apvUbr9%Wzox@p_U#KsP4pr4<3S0D1oa7|0X5cm)6e_--+8 literal 637 zcmV-@0)qV?iwFP!000001C5lyZ_-d4#~-5;8Nvu`>cPWPV=|R3Ed`<3rMQKYUKHjI zGf>w8b*06&%i?V(qc@YysAP%%fM)8&D={(AqnU|E7w5&q#ETw`armR}Ra%v02l({+ zeZSwYzxR0j=0^}hLBudY6dZ;}G+ZtfC-dMXL4<%YU_^YESnaL$85aqo9l`_1;npv* zzV(&rPgH-Pd`112jz)+Xg0BE#P`LN&Hl;4bwe)K3X)8W&_))$6vQ z;~6`J%erb~j^$H$PSMpxC54L@FrP^aJk2?q&*kw0b!ROPzj$zi#HNvHF<+&?ej*z~{zEU;O8UNYgpZaP~ZSeVb? zi-vBShUMu+*rPC)*-v5Ertsfw?sfbp1vs0_EBmPreWdq}Lx^0>b#jr!jmLy-k?`Sb z>SOBDxb|(r2d+Qi*OYGk!l6y)iWkOCJaw47)gmNy3W-3~w|?aKHdz<^9FJ`fE_A&0 z#X9Y)M*b0Eoh*wDx(}K94eBp#6RK|fs^8bDzW&AOGo-4~Nzile~sCvklTIL91aTL!A%?pCuN zJ$n6p%mAKQiryRZXgAK)4*wnvy3|zM(Ga3$L{lwWkt=d2*; z5-k&M5EU^;R9cEz@q`@IBwMwYWj89U8G4-<{&vEqu`=bh6ot=F?^oz{yWdG*po{!d XYy^6grfMdnfZF{H0V(;Gcm)6el`cZ`