update stocks scraper

2025-11-09 18:30:10 +01:00
parent 3f99f354de
commit b15aaaa0dc
1 changed files with 18 additions and 19 deletions
--- a/apps/stocks/src/utils/scraper.py
+++ b/apps/stocks/src/utils/scraper.py
@@ -8,31 +8,33 @@ async def scrape(url: str) -> str:

        await page.goto(url, timeout=60000)

-        # Wait until at least one toggle button is present
-        await page.wait_for_selector(".toggle-btn", timeout=20000)
+        # Wait until buttons are available
+        await page.wait_for_selector('div[role="button"][aria-expanded]', timeout=20000)

-        # Set zoom
+        # Zoom out for full view
        await page.evaluate("document.body.style.zoom='50%'")

-        # Find all toggle buttons
-        toggle_buttons = await page.query_selector_all(".toggle-btn")
-        print(f"Found {len(toggle_buttons)} toggle buttons")
+        # Find collapsible buttons
+        toggle_buttons = await page.query_selector_all(
+            'div[role="button"][aria-expanded]'
+        )
+        print(f"Found {len(toggle_buttons)} expandable buttons")

        for i, btn in enumerate(toggle_buttons):
            try:
-                # Ensure it's visible and enabled
-                if await btn.is_visible() and await btn.is_enabled():
-                    await btn.click()
-                    await page.wait_for_timeout(1000)
+                aria_expanded = await btn.get_attribute("aria-expanded")
+                if aria_expanded == "false":
+                    if await btn.is_visible() and await btn.is_enabled():
+                        await btn.click()
+                        await page.wait_for_timeout(1000)

                if i == len(toggle_buttons) - 1:
                    break

-                # Scroll down gradually
+                # Scroll gradually
                scroll_step = 500
                total_height = await page.evaluate("() => document.body.scrollHeight")
                current_position = 0
-
                while current_position < total_height:
                    await page.evaluate(f"window.scrollTo(0, {current_position});")
                    await page.wait_for_timeout(100)
@@ -44,17 +46,14 @@ async def scrape(url: str) -> str:
            except Exception as e:
                print(f"Skipped button due to error: {e}")

-        # Get the page content
+        # Capture expanded HTML
        page_source = await page.content()
-
-        # Close the browser
        await browser.close()

-        # Continue scraping logic here...
-        print("Scraping done")
-
-        # Save the page content to a file
+        # Save to file
        with open("/cache/scraped_page.html", "w") as fp:
            fp.write(page_source)

+        print("Scraping done")
+
    return page_source