update stocks scraper
This commit is contained in:
@@ -8,31 +8,33 @@ async def scrape(url: str) -> str:
|
|||||||
|
|
||||||
await page.goto(url, timeout=60000)
|
await page.goto(url, timeout=60000)
|
||||||
|
|
||||||
# Wait until at least one toggle button is present
|
# Wait until buttons are available
|
||||||
await page.wait_for_selector(".toggle-btn", timeout=20000)
|
await page.wait_for_selector('div[role="button"][aria-expanded]', timeout=20000)
|
||||||
|
|
||||||
# Set zoom
|
# Zoom out for full view
|
||||||
await page.evaluate("document.body.style.zoom='50%'")
|
await page.evaluate("document.body.style.zoom='50%'")
|
||||||
|
|
||||||
# Find all toggle buttons
|
# Find collapsible buttons
|
||||||
toggle_buttons = await page.query_selector_all(".toggle-btn")
|
toggle_buttons = await page.query_selector_all(
|
||||||
print(f"Found {len(toggle_buttons)} toggle buttons")
|
'div[role="button"][aria-expanded]'
|
||||||
|
)
|
||||||
|
print(f"Found {len(toggle_buttons)} expandable buttons")
|
||||||
|
|
||||||
for i, btn in enumerate(toggle_buttons):
|
for i, btn in enumerate(toggle_buttons):
|
||||||
try:
|
try:
|
||||||
# Ensure it's visible and enabled
|
aria_expanded = await btn.get_attribute("aria-expanded")
|
||||||
if await btn.is_visible() and await btn.is_enabled():
|
if aria_expanded == "false":
|
||||||
await btn.click()
|
if await btn.is_visible() and await btn.is_enabled():
|
||||||
await page.wait_for_timeout(1000)
|
await btn.click()
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
|
||||||
if i == len(toggle_buttons) - 1:
|
if i == len(toggle_buttons) - 1:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Scroll down gradually
|
# Scroll gradually
|
||||||
scroll_step = 500
|
scroll_step = 500
|
||||||
total_height = await page.evaluate("() => document.body.scrollHeight")
|
total_height = await page.evaluate("() => document.body.scrollHeight")
|
||||||
current_position = 0
|
current_position = 0
|
||||||
|
|
||||||
while current_position < total_height:
|
while current_position < total_height:
|
||||||
await page.evaluate(f"window.scrollTo(0, {current_position});")
|
await page.evaluate(f"window.scrollTo(0, {current_position});")
|
||||||
await page.wait_for_timeout(100)
|
await page.wait_for_timeout(100)
|
||||||
@@ -44,17 +46,14 @@ async def scrape(url: str) -> str:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Skipped button due to error: {e}")
|
print(f"Skipped button due to error: {e}")
|
||||||
|
|
||||||
# Get the page content
|
# Capture expanded HTML
|
||||||
page_source = await page.content()
|
page_source = await page.content()
|
||||||
|
|
||||||
# Close the browser
|
|
||||||
await browser.close()
|
await browser.close()
|
||||||
|
|
||||||
# Continue scraping logic here...
|
# Save to file
|
||||||
print("Scraping done")
|
|
||||||
|
|
||||||
# Save the page content to a file
|
|
||||||
with open("/cache/scraped_page.html", "w") as fp:
|
with open("/cache/scraped_page.html", "w") as fp:
|
||||||
fp.write(page_source)
|
fp.write(page_source)
|
||||||
|
|
||||||
|
print("Scraping done")
|
||||||
|
|
||||||
return page_source
|
return page_source
|
||||||
|
|||||||
Reference in New Issue
Block a user