JavaScript 渲染
如果您想抓取的页面需要加载JavaScript来动态加载所需数据到DOM中,而不是由自己设置和使用无头浏览器,则您可以在请求中包含“X-Oxylabs-Render: html”标头。所有包含此标头的请求将完全渲染,所有数据将被存储在一个HTML文件或PNG屏幕截图中(视所传送参数而定)。
此参数有两个可用值:
- html(已渲染页面的HTML)
- png(可保存为PNG的原始字节)
代码示例
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://ip.oxylabs.io" \
-H "X-Oxylabs-Render: html"import requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
  'http': 'http://user:[email protected]:60000',
  'https': 'http://user:[email protected]:60000',
}
headers = {
    "X-Oxylabs-Render": "html"
}
response = requests.get(
    'https://ip.oxylabs.io',
    verify=False,  # It is required to ignore certificate
    proxies=proxies,
    headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
    f.write(response.text)<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://ip.oxylabs.io");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
    CURLOPT_HTTPHEADER  => array(
        'X-Oxylabs-Render: html'
    )
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
    class Program
    {
        static async Task Main(string[] args)
        {
            var webProxy = new WebProxy
            {
                Address = new Uri($"http://unblock.oxylabs.io:60000"),
                BypassProxyOnLocal = false,
                UseDefaultCredentials = false,
                Credentials = new NetworkCredential(
                userName: "YOUR_USERNAME",
                password: "YOUR_PASSWORD"
                )
            };
            var httpClientHandler = new HttpClientHandler
            {
                Proxy = webProxy,
            };
            // We recommend accepting our certificate instead of allowing insecure (http) traffic
            httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
            httpClientHandler.ServerCertificateCustomValidationCallback =
                (httpRequestMessage, cert, cetChain, policyErrors) =>
                {
                    return true;
                };
            var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
            
            // Add custom cookies
            client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
            
            Uri baseUri = new Uri("https://ip.oxylabs.io");
            client.BaseAddress = baseUri;
            var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
            var response = await client.SendAsync(requestMessage);
            var contents = await response.Content.ReadAsStringAsync();
            Console.WriteLine(contents);
        }
    }
}package main
import (
	"crypto/tls"
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)
func main() {
	const Username = "YOUR_USERNAME"
	const Password = "YOUR_PASSWORD"
	proxyUrl, _ := url.Parse(
		fmt.Sprintf(
			"http://%s:%[email protected]:60000",
			Username,
			Password,
		),
	)
	customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
	// We recommend accepting our certificate instead of allowing insecure (http) traffic
	customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
	client := &http.Client{Transport: customTransport}
	request, _ := http.NewRequest("GET",
		"https://ip.oxylabs.io",
		nil,
	)
	
	// Add custom cookies
        request.Header.Add("X-Oxylabs-Render", "html")
        
	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)
	responseText, _ := ioutil.ReadAll(response.Body)
	fmt.Println(string(responseText))
}
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
    public static final String USERNAME = "YOUR_USERNAME";
    public static final String PASSWORD = "YOUR_PASSWORD";
    public void run() {
        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };
        OkHttpClient.Builder builder = new OkHttpClient.Builder();
        // We recommend accepting our certificate instead of allowing insecure (http) traffic
        this.disableSSLCertificateChecking(builder);
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("unblock.oxylabs.io", 60000));
        var client = builder
                .proxy(proxy)
                .proxyAuthenticator(authenticator)
                .build();
        var request = new Request.Builder()
                .url("https://ip.oxylabs.io")
                .addHeader("X-Oxylabs-Render", "html")
                .get()
                .build();
        try (var response = client.newCall(request).execute()) {
            assert response.body() != null;
            System.out.println(response.body().string());
        } catch (Exception exception) {
            exception.printStackTrace();
            System.exit(1);
        }
        System.exit(0);
    }
    private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
        TrustManager[] trustManagers = new TrustManager[]{
                new X509TrustManager() {
                    @Override
                    public java.security.cert.X509Certificate[] getAcceptedIssuers() {
                        return new java.security.cert.X509Certificate[]{};
                    }
                    @Override
                    public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
                    }
                    @Override
                    public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
                    }
                }
        };
        try {
            HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
            SSLContext sslContext = SSLContext.getInstance("TLS");
            sslContext.init(null, trustManagers, new java.security.SecureRandom());
            HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
            builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
        } catch (Exception exception) {
            exception.printStackTrace();
            System.exit(1);
        }
        builder.hostnameVerifier((hostname, session) -> true);
    }
    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent'
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
  `http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
   'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://ip.oxylabs.io', {
  method: 'get',
  headers: headers,
  agent: agent,
});
console.log(await response.text());抓取网站的HTML
在这个示例中,我们将渲染YouTube的主页,并抓取页面内容。通常情况下,如果使用了网页解锁器而不使用Javascript渲染,则YouTube的主页样式如下:

如下列示例所示添加"X-Oxylabs-Render: html "标头,将启用Javascript渲染,并返回渲染后的页面的HTML:
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://youtube.com" \
-H "X-Oxylabs-Render: html"import requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
  'http': 'http://user:[email protected]:60000',
  'https': 'http://user:[email protected]:60000',
}
headers = {
    "X-Oxylabs-Render": "html"
}
response = requests.get(
    'https://youtube.com',
    verify=False,  # It is required to ignore certificate
    proxies=proxies,
    headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
    f.write(response.text)<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://youtube.com");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
    CURLOPT_HTTPHEADER  => array(
        'X-Oxylabs-Render: html'
    )
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
    class Program
    {
        static async Task Main(string[] args)
        {
            var webProxy = new WebProxy
            {
                Address = new Uri($"http://unblock.oxylabs.io:60000"),
                BypassProxyOnLocal = false,
                UseDefaultCredentials = false,
                Credentials = new NetworkCredential(
                userName: "YOUR_USERNAME",
                password: "YOUR_PASSWORD"
                )
            };
            var httpClientHandler = new HttpClientHandler
            {
                Proxy = webProxy,
            };
            // We recommend accepting our certificate instead of allowing insecure (http) traffic
            httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
            httpClientHandler.ServerCertificateCustomValidationCallback =
                (httpRequestMessage, cert, cetChain, policyErrors) =>
                {
                    return true;
                };
            var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
            
            // Add custom header
            client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
            
            Uri baseUri = new Uri("https://youtube.com");
            client.BaseAddress = baseUri;
            var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
            var response = await client.SendAsync(requestMessage);
            var contents = await response.Content.ReadAsStringAsync();
            Console.WriteLine(contents);
        }
    }
}package main
import (
	"crypto/tls"
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)
func main() {
	const Username = "YOUR_USERNAME"
	const Password = "YOUR_PASSWORD"
	proxyUrl, _ := url.Parse(
		fmt.Sprintf(
			"http://%s:%[email protected]:60000",
			Username,
			Password,
		),
	)
	customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
	// We recommend accepting our certificate instead of allowing insecure (http) traffic
	customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
	client := &http.Client{Transport: customTransport}
	request, _ := http.NewRequest("GET",
		"https://youtube.com",
		nil,
	)
	
	// Add custom cookies
        request.Header.Add("X-Oxylabs-Render", "html")
        
	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)
	responseText, _ := ioutil.ReadAll(response.Body)
	fmt.Println(string(responseText))
}
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
    public static final String USERNAME = "YOUR_USERNAME";
    public static final String PASSWORD = "YOUR_PASSWORD";
    public void run() {
        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };
        OkHttpClient.Builder builder = new OkHttpClient.Builder();
        // We recommend accepting our certificate instead of allowing insecure (http) traffic
        this.disableSSLCertificateChecking(builder);
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("unblock.oxylabs.io", 60000));
        var client = builder
                .proxy(proxy)
                .proxyAuthenticator(authenticator)
                .build();
        var request = new Request.Builder()
                .url("https://youtube.com")
                .addHeader("X-Oxylabs-Render", "html")
                .get()
                .build();
        try (var response = client.newCall(request).execute()) {
            assert response.body() != null;
            System.out.println(response.body().string());
        } catch (Exception exception) {
            exception.printStackTrace();
            System.exit(1);
        }
        System.exit(0);
    }
    private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
        TrustManager[] trustManagers = new TrustManager[]{
                new X509TrustManager() {
                    @Override
                    public java.security.cert.X509Certificate[] getAcceptedIssuers() {
                        return new java.security.cert.X509Certificate[]{};
                    }
                    @Override
                    public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
                    }
                    @Override
                    public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
                    }
                }
        };
        try {
            HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
            SSLContext sslContext = SSLContext.getInstance("TLS");
            sslContext.init(null, trustManagers, new java.security.SecureRandom());
            HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
            builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
        } catch (Exception exception) {
            exception.printStackTrace();
            System.exit(1);
        }
        builder.hostnameVerifier((hostname, session) -> true);
    }
    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent'
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
  `http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
   'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://youtube.com', {
  method: 'get',
  headers: headers,
  agent: agent,
});
console.log(await response.text());在浏览器中打开的HTML文件应该如下所示:

获得一个完全渲染的页面的屏幕截图 
如需获取PNG格式的截图而不是页面HTML,则需要提供"X-Oxylabs-Render: png "标头。
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://youtube.com" \
-H "X-Oxylabs-Render: png" >> rendered_page.pngimport requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
  'http': 'http://user:[email protected]:60000',
  'https': 'http://user:[email protected]:60000',
}
headers = {
    "X-Oxylabs-Render": "png"
}
response = requests.get(
    'https://youtube.com',
    verify=False,  # It is required to ignore certificate
    proxies=proxies,
    headers=headers,
)
# Save screeenshot as PNG file
with open("rendered_page.png", 'wb') as f:
  f.write(response.content)<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://youtube.com");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
    CURLOPT_HTTPHEADER  => array(
        'X-Oxylabs-Render: png'
    )
));
$result = curl_exec($ch);
if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
if(file_exists('rendered_page.png')){
    unlink('rendered_page.png');
}
$fp = fopen('rendered_page.png','wb');
fwrite($fp, $result);
fclose($fp);
?>using System;
using System.Net;
using System.Net.Http;
using System.Drawing;
using System.IO;
using System.Threading.Tasks;
namespace OxyApi
{
    class Program
    {
        static async Task Main(string[] args)
        {
            var webProxy = new WebProxy
            {
                Address = new Uri($"http://unblock.oxylabs.io:60000"),
                BypassProxyOnLocal = false,
                UseDefaultCredentials = false,
                Credentials = new NetworkCredential(
                userName: "YOUR_USERNAME",
                password: "YOUR_PASSWORD"
                )
            };
            var httpClientHandler = new HttpClientHandler
            {
                Proxy = webProxy,
            };
            // We recommend accepting our certificate instead of allowing insecure (http) traffic
            httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
            httpClientHandler.ServerCertificateCustomValidationCallback =
                (httpRequestMessage, cert, cetChain, policyErrors) =>
                {
                    return true;
                };
            var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
            
            // Add custom header
            client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "png");
            
            Uri baseUri = new Uri("https://youtube.com");
            client.BaseAddress = baseUri;
            var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
            var response = await client.SendAsync(requestMessage);
            byte[] bytes = await response.Content.ReadAsByteArrayAsync();
            using (Image image = Image.FromStream(new MemoryStream(bytes)))
            {
                image.Save("rendered_page.png");
            }
        }
    }
}package main
import (
	"crypto/tls"
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)
func main() {
	const Username = "YOUR_USERNAME"
	const Password = "YOUR_PASSWORD"
	proxyUrl, _ := url.Parse(
		fmt.Sprintf(
			"http://%s:%[email protected]:60000",
			Username,
			Password,
		),
	)
	customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
	// We recommend accepting our certificate instead of allowing insecure (http) traffic
	customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
	client := &http.Client{Transport: customTransport}
	request, _ := http.NewRequest("GET",
		"https://youtube.com",
		nil,
	)
	
	// Add custom cookies
        request.Header.Add("X-Oxylabs-Render", "png")
        
	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)
	
	responseData, _ := ioutil.ReadAll(response.Body)
	response.Body.Close()
	ioutil.WriteFile("rendered_page.png", responseData, 0666)
	log.Println("Image was saved")
}
import fs from 'fs';
import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
  `http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
   'X-Oxylabs-Render': 'png',
}
await fetch('https://youtube.com', {
  method: 'get',
  headers: headers,
  agent: agent,
}).then(res => 
  res.body.pipe(fs.createWriteStream('./rendered_page.png'))
  );
console.log('Image was saved');响应将包含一个图像的原始字节,可以保存为PNG格式,并如下方示例打开:

Last updated
Was this helpful?

