JavaScript 渲染
如果您要抓取的页面需要通过 JavaScript 动态将所有必要数据加载到 DOM 中,您可以在请求中包含一个 render 参数,而不是手动设置和使用无头浏览器。带有此参数的请求将被完全渲染,数据将根据指定的参数存储为 HTML 文件或 PNG 截图之一。
HTML
将 render 参数设置为 html 以获取渲染页面的原始输出。
PNG(截图)
将 render 参数设置为 png 以获取渲染页面的 Base64 编码截图。
请求示例
curl --user "user:pass" \
'https://realtime.oxylabs.io/v1/queries' \
-H "Content-Type: application/json" \
-d '{"source": "universal", "url": "https://www.example.com", "render": "html"}'import requests
from pprint import pprint
# 构建负载(payload)。
payload = {
'source': 'universal',
'url': 'https://www.example.com',
'render': 'html',
}
# 获取响应。
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('user', 'pass1'),
json=payload,
)
# 与返回作业状态和结果 URL 的响应不同,此调用将直接返回
# 带结果的 JSON 响应。
pprint(response.json())<?php
$params = [
'source' => 'universal',
'url' => 'https://www.example.com',
'render' => 'html',
];
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var parameters = new Dictionary<string, string>()
{
{ "source", "universal" },
{ "url", "https://www.example.com" },
{ "render" : "html" },
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
payload := map[string]string{
"source": "universal",
"url": "https://www.example.com",
"render": "html",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://realtime.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}package org.example;
import okhttp3.*;
import org.json.JSONObject;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "universal");
jsonObject.put("url", "https://www.example.com");
jsonObject.put("render": "html");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const body = {
'source': 'universal',
'url': 'https://www.example.com',
'render': 'html'
};
const response = await fetch('https://realtime.oxylabs.io/v1/queries', {
method: 'post',
body: JSON.stringify(body),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());# 您提交的整个字符串必须进行 URL 编码。
https://realtime.oxylabs.io/v1/queries?source=universal&url=https%3A%2F%2Fwww.example.com%2F&render=html&access_token=12345abcde{
"source": "universal",
"url": "https://www.example.com",
"render": "html"
}JavaScript 渲染需要更多时间来抓取页面。如果使用 Realtime 或 Proxy Endpoint 集成方法,请在客户端将超时设置为 180 秒。
为确保最低流量消耗,我们的系统在页面渲染期间不会加载不必要的资源。
在特定页面强制渲染
为成功抓取,某些特定域的页面类型由于其动态内容需要渲染。我们的系统会自动对这些页面强制渲染,即使用户没有明确设置。
请注意,渲染的任务相比未渲染的任务会消耗更多流量。
我们希望用户在抓取以下页面时充分意识到这一点:
此方法提供了最佳的抓取体验,确保从这些具有挑战性的页面获取数据的准确性和可靠性。
如果您希望禁用渲染,可以通过在请求中添加以下参数来实现:
"render": ""浏览器指令
使用我们的无头浏览器,您还可以执行各种 浏览器指令 ,例如点击、滚动、输入、等待等。阅读更多:
浏览器指令最后更新于
这有帮助吗?

