store and register scrape

2025-07-29 17:22:42 +02:00
parent 0539dd9f7e
commit 02db619c6d
4 changed files with 72 additions and 26 deletions
--- a/apps/stocks/src/utils.py
+++ b/apps/stocks/src/utils.py
@@ -0,0 +1,19 @@
+import re
+from collections.abc import Iterator
+from datetime import date, datetime
+
+from bs4 import BeautifulSoup
+
+
+def extract_date(page_source: str) -> Iterator[date]:
+    # Parse with BeautifulSoup
+    soup = BeautifulSoup(page_source, "html.parser")
+
+    # Find the first <div> after </header>
+    if (header := soup.find("header")) and (div := header.find_next_sibling("div")):
+        # Extract date part using regex
+        match = re.search(r"(\d{1,2})(st|nd|rd|th)?\s+([A-Za-z]+)\s+(\d{4})", div.text)
+        if match:
+            day, _, month, year = match.groups()
+            date_obj = datetime.strptime(f"{day} {month} {year}", "%d %B %Y")
+            yield date_obj