diff --git a/apps/stocks/src/utils/scraper.py b/apps/stocks/src/utils/scraper.py index 405c0b9..c8daf93 100644 --- a/apps/stocks/src/utils/scraper.py +++ b/apps/stocks/src/utils/scraper.py @@ -8,31 +8,33 @@ async def scrape(url: str) -> str: await page.goto(url, timeout=60000) - # Wait until at least one toggle button is present - await page.wait_for_selector(".toggle-btn", timeout=20000) + # Wait until buttons are available + await page.wait_for_selector('div[role="button"][aria-expanded]', timeout=20000) - # Set zoom + # Zoom out for full view await page.evaluate("document.body.style.zoom='50%'") - # Find all toggle buttons - toggle_buttons = await page.query_selector_all(".toggle-btn") - print(f"Found {len(toggle_buttons)} toggle buttons") + # Find collapsible buttons + toggle_buttons = await page.query_selector_all( + 'div[role="button"][aria-expanded]' + ) + print(f"Found {len(toggle_buttons)} expandable buttons") for i, btn in enumerate(toggle_buttons): try: - # Ensure it's visible and enabled - if await btn.is_visible() and await btn.is_enabled(): - await btn.click() - await page.wait_for_timeout(1000) + aria_expanded = await btn.get_attribute("aria-expanded") + if aria_expanded == "false": + if await btn.is_visible() and await btn.is_enabled(): + await btn.click() + await page.wait_for_timeout(1000) if i == len(toggle_buttons) - 1: break - # Scroll down gradually + # Scroll gradually scroll_step = 500 total_height = await page.evaluate("() => document.body.scrollHeight") current_position = 0 - while current_position < total_height: await page.evaluate(f"window.scrollTo(0, {current_position});") await page.wait_for_timeout(100) @@ -44,17 +46,14 @@ async def scrape(url: str) -> str: except Exception as e: print(f"Skipped button due to error: {e}") - # Get the page content + # Capture expanded HTML page_source = await page.content() - - # Close the browser await browser.close() - # Continue scraping logic here... - print("Scraping done") - - # Save the page content to a file + # Save to file with open("/cache/scraped_page.html", "w") as fp: fp.write(page_source) + print("Scraping done") + return page_source