fix: image and ranking bugfix
This commit is contained in:
@@ -1,9 +1,4 @@
|
||||
"""Image proxy – scrape Funda listing pages for photo URLs."""
|
||||
|
||||
import re
|
||||
import time
|
||||
import urllib.request
|
||||
from typing import Optional
|
||||
"""Image endpoints – retrieve photo URLs from the raw Funda data."""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import text
|
||||
@@ -14,85 +9,23 @@ from app.database import get_db
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Simple in-memory cache: global_id → (timestamp, image_urls)
|
||||
_cache: dict[str, tuple[float, list[str]]] = {}
|
||||
_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _scrape_images(url: str) -> list[str]:
|
||||
"""Fetch a Funda listing page and extract image URLs."""
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
},
|
||||
)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=10)
|
||||
html = resp.read().decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
images: list[str] = []
|
||||
seen_bases: set[str] = set()
|
||||
|
||||
# Pattern 1: valentina_media images (main property photos)
|
||||
for match in re.finditer(
|
||||
r"https://cloud\.funda\.nl/valentina_media/(\d+/\d+/\d+)(?:\.jpg|_\d+x\d+\.jpg)",
|
||||
html,
|
||||
):
|
||||
base = match.group(1)
|
||||
if base not in seen_bases:
|
||||
seen_bases.add(base)
|
||||
images.append(
|
||||
f"https://cloud.funda.nl/valentina_media/{base}.jpg?options=width=720"
|
||||
)
|
||||
|
||||
# Pattern 2: listing-management images (newer uploads)
|
||||
for match in re.finditer(
|
||||
r"https://cloud\.funda\.nl/listing-management/([0-9a-f-]{36})",
|
||||
html,
|
||||
):
|
||||
uuid = match.group(1)
|
||||
if uuid not in seen_bases:
|
||||
seen_bases.add(uuid)
|
||||
images.append(
|
||||
f"https://cloud.funda.nl/listing-management/{uuid}?options=width=720"
|
||||
)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
@router.get("/listings/{global_id}/images")
|
||||
def get_listing_images(
|
||||
global_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
) -> dict[str, list[str]]:
|
||||
"""Return image URLs for a listing, scraped from its Funda page."""
|
||||
# Check cache
|
||||
now = time.time()
|
||||
if global_id in _cache:
|
||||
ts, cached = _cache[global_id]
|
||||
if now - ts < _CACHE_TTL:
|
||||
return {"images": cached}
|
||||
|
||||
# Look up listing URL
|
||||
"""Return image URLs for a listing from the raw Funda JSON data."""
|
||||
row = db.execute(
|
||||
text(
|
||||
f"SELECT url FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} "
|
||||
f"WHERE global_id = :gid"
|
||||
"SELECT raw_json->'photo_urls' AS photo_urls "
|
||||
"FROM raw_funda.listing_details "
|
||||
"WHERE global_id = :gid"
|
||||
),
|
||||
{"gid": global_id},
|
||||
).first()
|
||||
|
||||
if not row or not row.url:
|
||||
raise HTTPException(status_code=404, detail="Listing not found")
|
||||
if not row or not row.photo_urls:
|
||||
return {"images": []}
|
||||
|
||||
images = _scrape_images(row.url)
|
||||
_cache[global_id] = (now, images)
|
||||
|
||||
return {"images": images}
|
||||
return {"images": list(row.photo_urls)}
|
||||
|
||||
@@ -4,6 +4,7 @@ from fastapi import APIRouter, Depends
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.queries import LISTING_SELECT, row_to_listing
|
||||
from app.schemas import RankingResponse
|
||||
@@ -18,8 +19,14 @@ def get_rankings(
|
||||
offset: int = 0,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Return listings ranked by ELO rating (highest first)."""
|
||||
query = LISTING_SELECT
|
||||
"""Return listings ranked by ELO rating (highest first).
|
||||
|
||||
Only listings in the stable sample (elo.sample_listings) are shown.
|
||||
"""
|
||||
query = LISTING_SELECT + f"""
|
||||
INNER JOIN {settings.ELO_SCHEMA}.sample_listings s
|
||||
ON l.global_id = s.global_id
|
||||
"""
|
||||
params: dict = {"limit": limit, "offset": offset}
|
||||
|
||||
if status and status != "all":
|
||||
|
||||
Reference in New Issue
Block a user