update stocks scraper

This commit is contained in:
2025-11-09 18:30:10 +01:00
parent 3f99f354de
commit b15aaaa0dc

View File

@@ -8,31 +8,33 @@ async def scrape(url: str) -> str:
await page.goto(url, timeout=60000) await page.goto(url, timeout=60000)
# Wait until at least one toggle button is present # Wait until buttons are available
await page.wait_for_selector(".toggle-btn", timeout=20000) await page.wait_for_selector('div[role="button"][aria-expanded]', timeout=20000)
# Set zoom # Zoom out for full view
await page.evaluate("document.body.style.zoom='50%'") await page.evaluate("document.body.style.zoom='50%'")
# Find all toggle buttons # Find collapsible buttons
toggle_buttons = await page.query_selector_all(".toggle-btn") toggle_buttons = await page.query_selector_all(
print(f"Found {len(toggle_buttons)} toggle buttons") 'div[role="button"][aria-expanded]'
)
print(f"Found {len(toggle_buttons)} expandable buttons")
for i, btn in enumerate(toggle_buttons): for i, btn in enumerate(toggle_buttons):
try: try:
# Ensure it's visible and enabled aria_expanded = await btn.get_attribute("aria-expanded")
if await btn.is_visible() and await btn.is_enabled(): if aria_expanded == "false":
await btn.click() if await btn.is_visible() and await btn.is_enabled():
await page.wait_for_timeout(1000) await btn.click()
await page.wait_for_timeout(1000)
if i == len(toggle_buttons) - 1: if i == len(toggle_buttons) - 1:
break break
# Scroll down gradually # Scroll gradually
scroll_step = 500 scroll_step = 500
total_height = await page.evaluate("() => document.body.scrollHeight") total_height = await page.evaluate("() => document.body.scrollHeight")
current_position = 0 current_position = 0
while current_position < total_height: while current_position < total_height:
await page.evaluate(f"window.scrollTo(0, {current_position});") await page.evaluate(f"window.scrollTo(0, {current_position});")
await page.wait_for_timeout(100) await page.wait_for_timeout(100)
@@ -44,17 +46,14 @@ async def scrape(url: str) -> str:
except Exception as e: except Exception as e:
print(f"Skipped button due to error: {e}") print(f"Skipped button due to error: {e}")
# Get the page content # Capture expanded HTML
page_source = await page.content() page_source = await page.content()
# Close the browser
await browser.close() await browser.close()
# Continue scraping logic here... # Save to file
print("Scraping done")
# Save the page content to a file
with open("/cache/scraped_page.html", "w") as fp: with open("/cache/scraped_page.html", "w") as fp:
fp.write(page_source) fp.write(page_source)
print("Scraping done")
return page_source return page_source