From 8ada3eff1236c88d057803377035896804e09e9f Mon Sep 17 00:00:00 2001 From: Stijnvandenbroek Date: Thu, 5 Mar 2026 17:23:41 +0000 Subject: [PATCH] chore: sql linting --- .pre-commit-config.yaml | 2 +- .sqlfluff | 15 ++++ data_platform/assets/ingestion/funda/funda.py | 21 ----- .../funda/sql/ddl/create_listing_details.sql | 84 +++++++++---------- .../funda/sql/ddl/create_price_history.sql | 20 ++--- .../funda/sql/ddl/create_search_results.sql | 48 +++++------ .../sql/ddl/migrate_details_constraint.sql | 18 ---- .../ddl/migrate_price_history_constraint.sql | 20 ----- .../sql/ddl/migrate_search_constraint.sql | 17 ---- .../funda/sql/dml/insert_listing_details.sql | 8 +- .../funda/sql/dml/insert_price_history.sql | 6 +- .../funda/sql/dml/insert_search_results.sql | 8 +- 12 files changed, 103 insertions(+), 164 deletions(-) delete mode 100644 data_platform/assets/ingestion/funda/sql/ddl/migrate_details_constraint.sql delete mode 100644 data_platform/assets/ingestion/funda/sql/ddl/migrate_price_history_constraint.sql delete mode 100644 data_platform/assets/ingestion/funda/sql/ddl/migrate_search_constraint.sql diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bc10ffd..0a56025 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: entry: uv run sqlfluff lint --dialect postgres language: system types: [sql] - files: ^dbt/models/ + files: ^(dbt/models/|data_platform/assets/ingestion/) - id: prettier name: prettier diff --git a/.sqlfluff b/.sqlfluff index d938c01..a30b678 100644 --- a/.sqlfluff +++ b/.sqlfluff @@ -22,3 +22,18 @@ tab_space_size = 4 [sqlfluff:rules:layout.long_lines] ignore_comment_lines = true + +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.identifiers] +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.functions] +extended_capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.literals] +capitalisation_policy = lower + +[sqlfluff:rules:capitalisation.types] +extended_capitalisation_policy = lower diff --git a/data_platform/assets/ingestion/funda/funda.py b/data_platform/assets/ingestion/funda/funda.py index c57009c..8bb4335 100644 --- a/data_platform/assets/ingestion/funda/funda.py +++ b/data_platform/assets/ingestion/funda/funda.py @@ -113,13 +113,6 @@ def funda_search_results( conn.execute( text(render_sql(_SQL_DIR, "ddl/create_search_results.sql", schema=_SCHEMA)) ) - conn.execute( - text( - render_sql( - _SQL_DIR, "ddl/migrate_search_constraint.sql", schema=_SCHEMA - ) - ) - ) rows = [] for listing in all_listings: @@ -213,13 +206,6 @@ def funda_listing_details( conn.execute( text(render_sql(_SQL_DIR, "ddl/create_listing_details.sql", schema=_SCHEMA)) ) - conn.execute( - text( - render_sql( - _SQL_DIR, "ddl/migrate_details_constraint.sql", schema=_SCHEMA - ) - ) - ) with engine.connect() as conn: if config.fetch_all: @@ -363,13 +349,6 @@ def funda_price_history( conn.execute( text(render_sql(_SQL_DIR, "ddl/create_price_history.sql", schema=_SCHEMA)) ) - conn.execute( - text( - render_sql( - _SQL_DIR, "ddl/migrate_price_history_constraint.sql", schema=_SCHEMA - ) - ) - ) with engine.connect() as conn: if config.fetch_all: diff --git a/data_platform/assets/ingestion/funda/sql/ddl/create_listing_details.sql b/data_platform/assets/ingestion/funda/sql/ddl/create_listing_details.sql index 59a7a7c..94abb07 100644 --- a/data_platform/assets/ingestion/funda/sql/ddl/create_listing_details.sql +++ b/data_platform/assets/ingestion/funda/sql/ddl/create_listing_details.sql @@ -1,43 +1,43 @@ -CREATE TABLE IF NOT EXISTS {{ schema }}.listing_details ( - global_id TEXT, - tiny_id TEXT, - title TEXT, - city TEXT, - postcode TEXT, - province TEXT, - neighbourhood TEXT, - municipality TEXT, - price BIGINT, - price_formatted TEXT, - status TEXT, - offering_type TEXT, - object_type TEXT, - house_type TEXT, - construction_type TEXT, - construction_year TEXT, - energy_label TEXT, - living_area INT, - plot_area INT, - bedrooms INT, - rooms INT, - description TEXT, - publication_date TEXT, - latitude DOUBLE PRECISION, - longitude DOUBLE PRECISION, - has_garden BOOLEAN, - has_balcony BOOLEAN, - has_solar_panels BOOLEAN, - has_heat_pump BOOLEAN, - has_roof_terrace BOOLEAN, - is_energy_efficient BOOLEAN, - is_monument BOOLEAN, - url TEXT, - photo_count INT, - views INT, - saves INT, - raw_json JSONB, - ingested_at TIMESTAMPTZ DEFAULT now(), - last_fetched_at TIMESTAMPTZ DEFAULT now(), - is_stale BOOLEAN DEFAULT FALSE, - UNIQUE (global_id, status) +create table if not exists {{ schema }}.listing_details ( + global_id text, + tiny_id text, + title text, + city text, + postcode text, + province text, + neighbourhood text, + municipality text, + price bigint, + price_formatted text, + status text, + offering_type text, + object_type text, + house_type text, + construction_type text, + construction_year text, + energy_label text, + living_area int, + plot_area int, + bedrooms int, + rooms int, + description text, + publication_date text, + latitude double precision, + longitude double precision, + has_garden boolean, + has_balcony boolean, + has_solar_panels boolean, + has_heat_pump boolean, + has_roof_terrace boolean, + is_energy_efficient boolean, + is_monument boolean, + url text, + photo_count int, + views int, + saves int, + raw_json jsonb, + ingested_at timestamptz default now(), + last_fetched_at timestamptz default now(), + is_stale boolean default false, + unique (global_id, status) ); diff --git a/data_platform/assets/ingestion/funda/sql/ddl/create_price_history.sql b/data_platform/assets/ingestion/funda/sql/ddl/create_price_history.sql index 3332d0e..52a9c93 100644 --- a/data_platform/assets/ingestion/funda/sql/ddl/create_price_history.sql +++ b/data_platform/assets/ingestion/funda/sql/ddl/create_price_history.sql @@ -1,11 +1,11 @@ -CREATE TABLE IF NOT EXISTS {{ schema }}.price_history ( - global_id TEXT, - price BIGINT, - human_price TEXT, - date TEXT, - timestamp TEXT, - source TEXT, - status TEXT, - ingested_at TIMESTAMPTZ DEFAULT now(), - UNIQUE (global_id, date, source, status) +create table if not exists {{ schema }}.price_history ( + global_id text, + price bigint, + human_price text, + date text, + timestamp text, + source text, + status text, + ingested_at timestamptz default now(), + unique (global_id, date, source, status) ); diff --git a/data_platform/assets/ingestion/funda/sql/ddl/create_search_results.sql b/data_platform/assets/ingestion/funda/sql/ddl/create_search_results.sql index cca03f8..eb5755d 100644 --- a/data_platform/assets/ingestion/funda/sql/ddl/create_search_results.sql +++ b/data_platform/assets/ingestion/funda/sql/ddl/create_search_results.sql @@ -1,25 +1,25 @@ -CREATE TABLE IF NOT EXISTS {{ schema }}.search_results ( - global_id TEXT, - title TEXT, - city TEXT, - postcode TEXT, - province TEXT, - neighbourhood TEXT, - price BIGINT, - living_area INT, - plot_area INT, - bedrooms INT, - rooms INT, - energy_label TEXT, - object_type TEXT, - offering_type TEXT, - construction_type TEXT, - publish_date TEXT, - broker_id TEXT, - broker_name TEXT, - raw_json JSONB, - ingested_at TIMESTAMPTZ DEFAULT now(), - last_seen_at TIMESTAMPTZ DEFAULT now(), - is_active BOOLEAN DEFAULT TRUE, - UNIQUE (global_id) +create table if not exists {{ schema }}.search_results ( + global_id text, + title text, + city text, + postcode text, + province text, + neighbourhood text, + price bigint, + living_area int, + plot_area int, + bedrooms int, + rooms int, + energy_label text, + object_type text, + offering_type text, + construction_type text, + publish_date text, + broker_id text, + broker_name text, + raw_json jsonb, + ingested_at timestamptz default now(), + last_seen_at timestamptz default now(), + is_active boolean default true, + unique (global_id) ); diff --git a/data_platform/assets/ingestion/funda/sql/ddl/migrate_details_constraint.sql b/data_platform/assets/ingestion/funda/sql/ddl/migrate_details_constraint.sql deleted file mode 100644 index 1263152..0000000 --- a/data_platform/assets/ingestion/funda/sql/ddl/migrate_details_constraint.sql +++ /dev/null @@ -1,18 +0,0 @@ --- Deduplicate and add UNIQUE constraint to listing_details if it doesn't exist yet. -DO $$ -BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_constraint - WHERE conrelid = '{{ schema }}.listing_details'::regclass - AND contype = 'u' - ) THEN - DELETE FROM {{ schema }}.listing_details a - USING {{ schema }}.listing_details b - WHERE a.global_id = b.global_id - AND a.status IS NOT DISTINCT FROM b.status - AND a.ingested_at < b.ingested_at; - - ALTER TABLE {{ schema }}.listing_details - ADD UNIQUE (global_id, status); - END IF; -END $$; diff --git a/data_platform/assets/ingestion/funda/sql/ddl/migrate_price_history_constraint.sql b/data_platform/assets/ingestion/funda/sql/ddl/migrate_price_history_constraint.sql deleted file mode 100644 index 8294b71..0000000 --- a/data_platform/assets/ingestion/funda/sql/ddl/migrate_price_history_constraint.sql +++ /dev/null @@ -1,20 +0,0 @@ --- Deduplicate and add UNIQUE constraint to price_history if it doesn't exist yet. -DO $$ -BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_constraint - WHERE conrelid = '{{ schema }}.price_history'::regclass - AND contype = 'u' - ) THEN - DELETE FROM {{ schema }}.price_history a - USING {{ schema }}.price_history b - WHERE a.global_id = b.global_id - AND a.date IS NOT DISTINCT FROM b.date - AND a.source IS NOT DISTINCT FROM b.source - AND a.status IS NOT DISTINCT FROM b.status - AND a.ingested_at < b.ingested_at; - - ALTER TABLE {{ schema }}.price_history - ADD UNIQUE (global_id, date, source, status); - END IF; -END $$; diff --git a/data_platform/assets/ingestion/funda/sql/ddl/migrate_search_constraint.sql b/data_platform/assets/ingestion/funda/sql/ddl/migrate_search_constraint.sql deleted file mode 100644 index 0cfc840..0000000 --- a/data_platform/assets/ingestion/funda/sql/ddl/migrate_search_constraint.sql +++ /dev/null @@ -1,17 +0,0 @@ --- Deduplicate and add UNIQUE constraint to search_results if it doesn't exist yet. -DO $$ -BEGIN - IF NOT EXISTS ( - SELECT 1 FROM pg_constraint - WHERE conrelid = '{{ schema }}.search_results'::regclass - AND contype = 'u' - ) THEN - DELETE FROM {{ schema }}.search_results a - USING {{ schema }}.search_results b - WHERE a.global_id = b.global_id - AND a.ingested_at < b.ingested_at; - - ALTER TABLE {{ schema }}.search_results - ADD UNIQUE (global_id); - END IF; -END $$; diff --git a/data_platform/assets/ingestion/funda/sql/dml/insert_listing_details.sql b/data_platform/assets/ingestion/funda/sql/dml/insert_listing_details.sql index 434600d..3f6470f 100644 --- a/data_platform/assets/ingestion/funda/sql/dml/insert_listing_details.sql +++ b/data_platform/assets/ingestion/funda/sql/dml/insert_listing_details.sql @@ -1,4 +1,4 @@ -INSERT INTO {{ schema }}.listing_details ( +insert into {{ schema }}.listing_details ( global_id, tiny_id, title, city, postcode, province, neighbourhood, municipality, price, price_formatted, status, offering_type, object_type, house_type, @@ -9,7 +9,7 @@ INSERT INTO {{ schema }}.listing_details ( has_roof_terrace, is_energy_efficient, is_monument, url, photo_count, views, saves, raw_json ) -VALUES ( +values ( :global_id, :tiny_id, :title, :city, :postcode, :province, :neighbourhood, :municipality, :price, :price_formatted, :status, :offering_type, :object_type, :house_type, @@ -20,7 +20,7 @@ VALUES ( :has_roof_terrace, :is_energy_efficient, :is_monument, :url, :photo_count, :views, :saves, :raw_json ) -ON CONFLICT (global_id, status) DO UPDATE SET +on conflict (global_id, status) do update set tiny_id = excluded.tiny_id, title = excluded.title, city = excluded.city, @@ -58,4 +58,4 @@ ON CONFLICT (global_id, status) DO UPDATE SET raw_json = excluded.raw_json, ingested_at = now(), last_fetched_at = now(), - is_stale = FALSE + is_stale = false diff --git a/data_platform/assets/ingestion/funda/sql/dml/insert_price_history.sql b/data_platform/assets/ingestion/funda/sql/dml/insert_price_history.sql index f324647..be6784b 100644 --- a/data_platform/assets/ingestion/funda/sql/dml/insert_price_history.sql +++ b/data_platform/assets/ingestion/funda/sql/dml/insert_price_history.sql @@ -1,10 +1,10 @@ -INSERT INTO {{ schema }}.price_history ( +insert into {{ schema }}.price_history ( global_id, price, human_price, date, timestamp, source, status ) -VALUES ( +values ( :global_id, :price, :human_price, :date, :timestamp, :source, :status ) -ON CONFLICT (global_id, date, source, status) DO UPDATE SET +on conflict (global_id, date, source, status) do update set price = excluded.price, human_price = excluded.human_price, timestamp = excluded.timestamp, diff --git a/data_platform/assets/ingestion/funda/sql/dml/insert_search_results.sql b/data_platform/assets/ingestion/funda/sql/dml/insert_search_results.sql index 87fd786..aff501b 100644 --- a/data_platform/assets/ingestion/funda/sql/dml/insert_search_results.sql +++ b/data_platform/assets/ingestion/funda/sql/dml/insert_search_results.sql @@ -1,16 +1,16 @@ -INSERT INTO {{ schema }}.search_results ( +insert into {{ schema }}.search_results ( global_id, title, city, postcode, province, neighbourhood, price, living_area, plot_area, bedrooms, rooms, energy_label, object_type, offering_type, construction_type, publish_date, broker_id, broker_name, raw_json ) -VALUES ( +values ( :global_id, :title, :city, :postcode, :province, :neighbourhood, :price, :living_area, :plot_area, :bedrooms, :rooms, :energy_label, :object_type, :offering_type, :construction_type, :publish_date, :broker_id, :broker_name, :raw_json ) -ON CONFLICT (global_id) DO UPDATE SET +on conflict (global_id) do update set title = excluded.title, city = excluded.city, postcode = excluded.postcode, @@ -31,4 +31,4 @@ ON CONFLICT (global_id) DO UPDATE SET raw_json = excluded.raw_json, ingested_at = now(), last_seen_at = now(), - is_active = TRUE + is_active = true