import os
import requests
from dotenv import load_dotenv
from langchain_openai import OpenAI
from langchain_core.prompts import PromptTemplate
load_dotenv()
def scrape_website(url):
"""使用 Oxylabs Web Scraper API 抓取网站"""
payload = {
"source": "universal",
"url": url,
"parse": True
}
response = requests.post(
"https://realtime.oxylabs.io/v1/queries",
auth=(os.getenv("OXYLABS_USERNAME"), os.getenv("OXYLABS_PASSWORD")),
json=payload
)
if response.status_code == 200:
data = response.json()
content = data["results"][0]["content"]
return str(content)
else:
print(f"Failed to scrape website: {response.text}")
return None
def process_content(content):
"""使用 LangChain 处理抓取到的内容"""
if not content:
print("No content to process.")
return None
prompt = PromptTemplate.from_template(
"Analyze the following website content and summarize key points: {content}"
)
chain = prompt | OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
result = chain.invoke({"content": content})
return result
def main(url):
print("Scraping website...")
scraped_content = scrape_website(url)
if scraped_content:
print("Processing scraped content with LangChain...")
analysis = process_content(scraped_content)
print("\nProcessed Analysis:\n", analysis)
else:
print("No content scraped.")
if __name__ == "__main__":
url = "https://sandbox.oxylabs.io/products/1"
main(url)