chore: sql linting

This commit is contained in:
Stijnvandenbroek
2026-03-05 17:23:41 +00:00
parent 7902a5e354
commit 8ada3eff12
12 changed files with 103 additions and 164 deletions

View File

@@ -19,7 +19,7 @@ repos:
entry: uv run sqlfluff lint --dialect postgres entry: uv run sqlfluff lint --dialect postgres
language: system language: system
types: [sql] types: [sql]
files: ^dbt/models/ files: ^(dbt/models/|data_platform/assets/ingestion/)
- id: prettier - id: prettier
name: prettier name: prettier

View File

@@ -22,3 +22,18 @@ tab_space_size = 4
[sqlfluff:rules:layout.long_lines] [sqlfluff:rules:layout.long_lines]
ignore_comment_lines = true ignore_comment_lines = true
[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = lower
[sqlfluff:rules:capitalisation.identifiers]
capitalisation_policy = lower
[sqlfluff:rules:capitalisation.functions]
extended_capitalisation_policy = lower
[sqlfluff:rules:capitalisation.literals]
capitalisation_policy = lower
[sqlfluff:rules:capitalisation.types]
extended_capitalisation_policy = lower

View File

@@ -113,13 +113,6 @@ def funda_search_results(
conn.execute( conn.execute(
text(render_sql(_SQL_DIR, "ddl/create_search_results.sql", schema=_SCHEMA)) text(render_sql(_SQL_DIR, "ddl/create_search_results.sql", schema=_SCHEMA))
) )
conn.execute(
text(
render_sql(
_SQL_DIR, "ddl/migrate_search_constraint.sql", schema=_SCHEMA
)
)
)
rows = [] rows = []
for listing in all_listings: for listing in all_listings:
@@ -213,13 +206,6 @@ def funda_listing_details(
conn.execute( conn.execute(
text(render_sql(_SQL_DIR, "ddl/create_listing_details.sql", schema=_SCHEMA)) text(render_sql(_SQL_DIR, "ddl/create_listing_details.sql", schema=_SCHEMA))
) )
conn.execute(
text(
render_sql(
_SQL_DIR, "ddl/migrate_details_constraint.sql", schema=_SCHEMA
)
)
)
with engine.connect() as conn: with engine.connect() as conn:
if config.fetch_all: if config.fetch_all:
@@ -363,13 +349,6 @@ def funda_price_history(
conn.execute( conn.execute(
text(render_sql(_SQL_DIR, "ddl/create_price_history.sql", schema=_SCHEMA)) text(render_sql(_SQL_DIR, "ddl/create_price_history.sql", schema=_SCHEMA))
) )
conn.execute(
text(
render_sql(
_SQL_DIR, "ddl/migrate_price_history_constraint.sql", schema=_SCHEMA
)
)
)
with engine.connect() as conn: with engine.connect() as conn:
if config.fetch_all: if config.fetch_all:

View File

@@ -1,43 +1,43 @@
CREATE TABLE IF NOT EXISTS {{ schema }}.listing_details ( create table if not exists {{ schema }}.listing_details (
global_id TEXT, global_id text,
tiny_id TEXT, tiny_id text,
title TEXT, title text,
city TEXT, city text,
postcode TEXT, postcode text,
province TEXT, province text,
neighbourhood TEXT, neighbourhood text,
municipality TEXT, municipality text,
price BIGINT, price bigint,
price_formatted TEXT, price_formatted text,
status TEXT, status text,
offering_type TEXT, offering_type text,
object_type TEXT, object_type text,
house_type TEXT, house_type text,
construction_type TEXT, construction_type text,
construction_year TEXT, construction_year text,
energy_label TEXT, energy_label text,
living_area INT, living_area int,
plot_area INT, plot_area int,
bedrooms INT, bedrooms int,
rooms INT, rooms int,
description TEXT, description text,
publication_date TEXT, publication_date text,
latitude DOUBLE PRECISION, latitude double precision,
longitude DOUBLE PRECISION, longitude double precision,
has_garden BOOLEAN, has_garden boolean,
has_balcony BOOLEAN, has_balcony boolean,
has_solar_panels BOOLEAN, has_solar_panels boolean,
has_heat_pump BOOLEAN, has_heat_pump boolean,
has_roof_terrace BOOLEAN, has_roof_terrace boolean,
is_energy_efficient BOOLEAN, is_energy_efficient boolean,
is_monument BOOLEAN, is_monument boolean,
url TEXT, url text,
photo_count INT, photo_count int,
views INT, views int,
saves INT, saves int,
raw_json JSONB, raw_json jsonb,
ingested_at TIMESTAMPTZ DEFAULT now(), ingested_at timestamptz default now(),
last_fetched_at TIMESTAMPTZ DEFAULT now(), last_fetched_at timestamptz default now(),
is_stale BOOLEAN DEFAULT FALSE, is_stale boolean default false,
UNIQUE (global_id, status) unique (global_id, status)
); );

View File

@@ -1,11 +1,11 @@
CREATE TABLE IF NOT EXISTS {{ schema }}.price_history ( create table if not exists {{ schema }}.price_history (
global_id TEXT, global_id text,
price BIGINT, price bigint,
human_price TEXT, human_price text,
date TEXT, date text,
timestamp TEXT, timestamp text,
source TEXT, source text,
status TEXT, status text,
ingested_at TIMESTAMPTZ DEFAULT now(), ingested_at timestamptz default now(),
UNIQUE (global_id, date, source, status) unique (global_id, date, source, status)
); );

View File

@@ -1,25 +1,25 @@
CREATE TABLE IF NOT EXISTS {{ schema }}.search_results ( create table if not exists {{ schema }}.search_results (
global_id TEXT, global_id text,
title TEXT, title text,
city TEXT, city text,
postcode TEXT, postcode text,
province TEXT, province text,
neighbourhood TEXT, neighbourhood text,
price BIGINT, price bigint,
living_area INT, living_area int,
plot_area INT, plot_area int,
bedrooms INT, bedrooms int,
rooms INT, rooms int,
energy_label TEXT, energy_label text,
object_type TEXT, object_type text,
offering_type TEXT, offering_type text,
construction_type TEXT, construction_type text,
publish_date TEXT, publish_date text,
broker_id TEXT, broker_id text,
broker_name TEXT, broker_name text,
raw_json JSONB, raw_json jsonb,
ingested_at TIMESTAMPTZ DEFAULT now(), ingested_at timestamptz default now(),
last_seen_at TIMESTAMPTZ DEFAULT now(), last_seen_at timestamptz default now(),
is_active BOOLEAN DEFAULT TRUE, is_active boolean default true,
UNIQUE (global_id) unique (global_id)
); );

View File

@@ -1,18 +0,0 @@
-- Deduplicate and add UNIQUE constraint to listing_details if it doesn't exist yet.
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conrelid = '{{ schema }}.listing_details'::regclass
AND contype = 'u'
) THEN
DELETE FROM {{ schema }}.listing_details a
USING {{ schema }}.listing_details b
WHERE a.global_id = b.global_id
AND a.status IS NOT DISTINCT FROM b.status
AND a.ingested_at < b.ingested_at;
ALTER TABLE {{ schema }}.listing_details
ADD UNIQUE (global_id, status);
END IF;
END $$;

View File

@@ -1,20 +0,0 @@
-- Deduplicate and add UNIQUE constraint to price_history if it doesn't exist yet.
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conrelid = '{{ schema }}.price_history'::regclass
AND contype = 'u'
) THEN
DELETE FROM {{ schema }}.price_history a
USING {{ schema }}.price_history b
WHERE a.global_id = b.global_id
AND a.date IS NOT DISTINCT FROM b.date
AND a.source IS NOT DISTINCT FROM b.source
AND a.status IS NOT DISTINCT FROM b.status
AND a.ingested_at < b.ingested_at;
ALTER TABLE {{ schema }}.price_history
ADD UNIQUE (global_id, date, source, status);
END IF;
END $$;

View File

@@ -1,17 +0,0 @@
-- Deduplicate and add UNIQUE constraint to search_results if it doesn't exist yet.
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_constraint
WHERE conrelid = '{{ schema }}.search_results'::regclass
AND contype = 'u'
) THEN
DELETE FROM {{ schema }}.search_results a
USING {{ schema }}.search_results b
WHERE a.global_id = b.global_id
AND a.ingested_at < b.ingested_at;
ALTER TABLE {{ schema }}.search_results
ADD UNIQUE (global_id);
END IF;
END $$;

View File

@@ -1,4 +1,4 @@
INSERT INTO {{ schema }}.listing_details ( insert into {{ schema }}.listing_details (
global_id, tiny_id, title, city, postcode, province, global_id, tiny_id, title, city, postcode, province,
neighbourhood, municipality, price, price_formatted, neighbourhood, municipality, price, price_formatted,
status, offering_type, object_type, house_type, status, offering_type, object_type, house_type,
@@ -9,7 +9,7 @@ INSERT INTO {{ schema }}.listing_details (
has_roof_terrace, is_energy_efficient, is_monument, has_roof_terrace, is_energy_efficient, is_monument,
url, photo_count, views, saves, raw_json url, photo_count, views, saves, raw_json
) )
VALUES ( values (
:global_id, :tiny_id, :title, :city, :postcode, :province, :global_id, :tiny_id, :title, :city, :postcode, :province,
:neighbourhood, :municipality, :price, :price_formatted, :neighbourhood, :municipality, :price, :price_formatted,
:status, :offering_type, :object_type, :house_type, :status, :offering_type, :object_type, :house_type,
@@ -20,7 +20,7 @@ VALUES (
:has_roof_terrace, :is_energy_efficient, :is_monument, :has_roof_terrace, :is_energy_efficient, :is_monument,
:url, :photo_count, :views, :saves, :raw_json :url, :photo_count, :views, :saves, :raw_json
) )
ON CONFLICT (global_id, status) DO UPDATE SET on conflict (global_id, status) do update set
tiny_id = excluded.tiny_id, tiny_id = excluded.tiny_id,
title = excluded.title, title = excluded.title,
city = excluded.city, city = excluded.city,
@@ -58,4 +58,4 @@ ON CONFLICT (global_id, status) DO UPDATE SET
raw_json = excluded.raw_json, raw_json = excluded.raw_json,
ingested_at = now(), ingested_at = now(),
last_fetched_at = now(), last_fetched_at = now(),
is_stale = FALSE is_stale = false

View File

@@ -1,10 +1,10 @@
INSERT INTO {{ schema }}.price_history ( insert into {{ schema }}.price_history (
global_id, price, human_price, date, timestamp, source, status global_id, price, human_price, date, timestamp, source, status
) )
VALUES ( values (
:global_id, :price, :human_price, :date, :timestamp, :source, :status :global_id, :price, :human_price, :date, :timestamp, :source, :status
) )
ON CONFLICT (global_id, date, source, status) DO UPDATE SET on conflict (global_id, date, source, status) do update set
price = excluded.price, price = excluded.price,
human_price = excluded.human_price, human_price = excluded.human_price,
timestamp = excluded.timestamp, timestamp = excluded.timestamp,

View File

@@ -1,16 +1,16 @@
INSERT INTO {{ schema }}.search_results ( insert into {{ schema }}.search_results (
global_id, title, city, postcode, province, neighbourhood, global_id, title, city, postcode, province, neighbourhood,
price, living_area, plot_area, bedrooms, rooms, energy_label, price, living_area, plot_area, bedrooms, rooms, energy_label,
object_type, offering_type, construction_type, publish_date, object_type, offering_type, construction_type, publish_date,
broker_id, broker_name, raw_json broker_id, broker_name, raw_json
) )
VALUES ( values (
:global_id, :title, :city, :postcode, :province, :neighbourhood, :global_id, :title, :city, :postcode, :province, :neighbourhood,
:price, :living_area, :plot_area, :bedrooms, :rooms, :energy_label, :price, :living_area, :plot_area, :bedrooms, :rooms, :energy_label,
:object_type, :offering_type, :construction_type, :publish_date, :object_type, :offering_type, :construction_type, :publish_date,
:broker_id, :broker_name, :raw_json :broker_id, :broker_name, :raw_json
) )
ON CONFLICT (global_id) DO UPDATE SET on conflict (global_id) do update set
title = excluded.title, title = excluded.title,
city = excluded.city, city = excluded.city,
postcode = excluded.postcode, postcode = excluded.postcode,
@@ -31,4 +31,4 @@ ON CONFLICT (global_id) DO UPDATE SET
raw_json = excluded.raw_json, raw_json = excluded.raw_json,
ingested_at = now(), ingested_at = now(),
last_seen_at = now(), last_seen_at = now(),
is_active = TRUE is_active = true