feat: implement images in listings comparison
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.routers import comparisons, listings, rankings
|
||||
from app.routers import comparisons, images, listings, rankings
|
||||
|
||||
app = FastAPI(
|
||||
title="House ELO Ranking",
|
||||
@@ -20,6 +20,7 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
app.include_router(comparisons.router, prefix="/api", tags=["comparisons"])
|
||||
app.include_router(images.router, prefix="/api", tags=["images"])
|
||||
app.include_router(listings.router, prefix="/api", tags=["listings"])
|
||||
app.include_router(rankings.router, prefix="/api", tags=["rankings"])
|
||||
|
||||
|
||||
98
backend/app/routers/images.py
Normal file
98
backend/app/routers/images.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""Image proxy – scrape Funda listing pages for photo URLs."""
|
||||
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Simple in-memory cache: global_id → (timestamp, image_urls)
|
||||
_cache: dict[str, tuple[float, list[str]]] = {}
|
||||
_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _scrape_images(url: str) -> list[str]:
|
||||
"""Fetch a Funda listing page and extract image URLs."""
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
},
|
||||
)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=10)
|
||||
html = resp.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
images: list[str] = []
|
||||
seen_bases: set[str] = set()
|
||||
|
||||
# Pattern 1: valentina_media images (main property photos)
|
||||
for match in re.finditer(
|
||||
r"https://cloud\.funda\.nl/valentina_media/(\d+/\d+/\d+)(?:\.jpg|_\d+x\d+\.jpg)",
|
||||
html,
|
||||
):
|
||||
base = match.group(1)
|
||||
if base not in seen_bases:
|
||||
seen_bases.add(base)
|
||||
images.append(
|
||||
f"https://cloud.funda.nl/valentina_media/{base}.jpg?options=width=720"
|
||||
)
|
||||
|
||||
# Pattern 2: listing-management images (newer uploads)
|
||||
for match in re.finditer(
|
||||
r"https://cloud\.funda\.nl/listing-management/([0-9a-f-]{36})",
|
||||
html,
|
||||
):
|
||||
uuid = match.group(1)
|
||||
if uuid not in seen_bases:
|
||||
seen_bases.add(uuid)
|
||||
images.append(
|
||||
f"https://cloud.funda.nl/listing-management/{uuid}?options=width=720"
|
||||
)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
@router.get("/listings/{global_id}/images")
|
||||
def get_listing_images(
|
||||
global_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
) -> dict[str, list[str]]:
|
||||
"""Return image URLs for a listing, scraped from its Funda page."""
|
||||
# Check cache
|
||||
now = time.time()
|
||||
if global_id in _cache:
|
||||
ts, cached = _cache[global_id]
|
||||
if now - ts < _CACHE_TTL:
|
||||
return {"images": cached}
|
||||
|
||||
# Look up listing URL
|
||||
row = db.execute(
|
||||
text(
|
||||
f"SELECT url FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} "
|
||||
f"WHERE global_id = :gid"
|
||||
),
|
||||
{"gid": global_id},
|
||||
).first()
|
||||
|
||||
if not row or not row.url:
|
||||
raise HTTPException(status_code=404, detail="Listing not found")
|
||||
|
||||
images = _scrape_images(row.url)
|
||||
_cache[global_id] = (now, images)
|
||||
|
||||
return {"images": images}
|
||||
Reference in New Issue
Block a user