处理 CAPTCHA 事件
了解 解锁浏览器 如何捕获和处理 CAPTCHA 事件。
基于事件的方法(推荐)
工作原理
示例代码
import sys
from playwright.sync_api import sync_playwright
# 配置
EXTENSION_NAME = 'oxylabs-runtime'
CAPTCHA_SOLVE_END = 'oxylabs-captcha-solve-end'
CAPTCHA_SOLVE_ERROR = 'oxylabs-captcha-solve-error'
UB_BROWSER_URL = 'wss://<username>:<password>@ubc.oxylabs.io'
CAPTCHA_CHECK_TIMEOUT_SEC = 60
TARGET_URL = 'https://www.indeed.com/cmp/Bank-of-the-West/reviews?lang=any&fcountry=ALL&sort=date'
def run():
with sync_playwright() as p:
print('正在打开浏览器页面...')
# 使用 CDP 连接到远程浏览器
browser = p.chromium.connect_over_cdp(UB_BROWSER_URL)
# 获取默认上下文并创建新页面
ctx = browser.contexts[0]
page = ctx.new_page()
# 这必须在导航之前完成。
ctx.add_init_script(f"""
window.addEventListener("message", (event) => {{
if (event.data && event.data.source === "{EXTENSION_NAME}") {{
window.__extensionStatus = event.data.type;
}}
}});
""")
print('正在打开目标网站...')
page.goto(TARGET_URL, wait_until='domcontentloaded')
try:
# 等待扩展状态发生变化
page.wait_for_function(
"""
([solveEnd, solveError]) => {
const status = window.__extensionStatus;
if (status === solveError) {
throw new Error("CAPTCHA 解决失败");
}
return status === solveEnd;
}
""",
arg=[CAPTCHA_SOLVE_END, CAPTCHA_SOLVE_ERROR],
timeout=CAPTCHA_CHECK_TIMEOUT_SEC * 1000
)
print('CAPTCHA 已成功解决,继续抓取...')
except Exception as err:
print(f'CAPTCHA 解决期间出错: {err}')
browser.close()
sys.exit(1)
page.wait_for_selector('h1[data-testid="PageHeader-title-reviews"]')
page.screenshot(path='page_screenshot.jpg')
page.close()
browser.close()
print('完成。')
if __name__ == "__main__":
run()控制台日志方法(传统)
最后更新于
这有帮助吗?

