store and register scrape
This commit is contained in:
19
apps/stocks/src/utils.py
Normal file
19
apps/stocks/src/utils.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import re
|
||||
from collections.abc import Iterator
|
||||
from datetime import date, datetime
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def extract_date(page_source: str) -> Iterator[date]:
|
||||
# Parse with BeautifulSoup
|
||||
soup = BeautifulSoup(page_source, "html.parser")
|
||||
|
||||
# Find the first <div> after </header>
|
||||
if (header := soup.find("header")) and (div := header.find_next_sibling("div")):
|
||||
# Extract date part using regex
|
||||
match = re.search(r"(\d{1,2})(st|nd|rd|th)?\s+([A-Za-z]+)\s+(\d{4})", div.text)
|
||||
if match:
|
||||
day, _, month, year = match.groups()
|
||||
date_obj = datetime.strptime(f"{day} {month} {year}", "%d %B %Y")
|
||||
yield date_obj
|
||||
Reference in New Issue
Block a user