JavaScript 渲染
Last updated
Last updated
如果您想抓取的页面需要加载JavaScript来动态加载所需数据到DOM中,而不是由自己设置和使用无头浏览器,则您可以在请求中包含“X-Oxylabs-Render: html”
标头。所有包含此标头的请求将完全渲染,所有数据将被存储在一个HTML文件或PNG屏幕截图中(视所传送参数而定)。
此参数有两个可用值:
html
(已渲染页面的HTML)
png
(可保存为PNG的原始字节)
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://ip.oxylabs.io" \
-H "X-Oxylabs-Render: html"
import requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
'http': 'http://user:pass1@unblock.oxylabs.io:60000',
'https': 'http://user:pass1@unblock.oxylabs.io:60000',
}
headers = {
"X-Oxylabs-Render": "html"
}
response = requests.get(
'https://ip.oxylabs.io',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
f.write(response.text)
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://ip.oxylabs.io");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
CURLOPT_HTTPHEADER => array(
'X-Oxylabs-Render: html'
)
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri($"http://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom cookies
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
Uri baseUri = new Uri("https://ip.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"http://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://ip.oxylabs.io",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "html")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
OkHttpClient.Builder builder = new OkHttpClient.Builder();
// We recommend accepting our certificate instead of allowing insecure (http) traffic
this.disableSSLCertificateChecking(builder);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("unblock.oxylabs.io", 60000));
var client = builder
.proxy(proxy)
.proxyAuthenticator(authenticator)
.build();
var request = new Request.Builder()
.url("https://ip.oxylabs.io")
.addHeader("X-Oxylabs-Render", "html")
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
System.exit(0);
}
private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
TrustManager[] trustManagers = new TrustManager[]{
new X509TrustManager() {
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
}
};
try {
HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustManagers, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
builder.hostnameVerifier((hostname, session) -> true);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent'
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
`http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://ip.oxylabs.io', {
method: 'get',
headers: headers,
agent: agent,
});
console.log(await response.text());
在这个示例中,我们将渲染YouTube的主页,并抓取页面内容。通常情况下,如果使用了网页解锁器而不使用Javascript渲染,则YouTube的主页样式如下:
如下列示例所示添加"X-Oxylabs-Render: html
"标头,将启用Javascript渲染,并返回渲染后的页面的HTML:
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://youtube.com" \
-H "X-Oxylabs-Render: html"
import requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
'http': 'http://user:pass1@unblock.oxylabs.io:60000',
'https': 'http://user:pass1@unblock.oxylabs.io:60000',
}
headers = {
"X-Oxylabs-Render": "html"
}
response = requests.get(
'https://youtube.com',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
f.write(response.text)
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://youtube.com");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
CURLOPT_HTTPHEADER => array(
'X-Oxylabs-Render: html'
)
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri($"http://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom header
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
Uri baseUri = new Uri("https://youtube.com");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"http://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://youtube.com",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "html")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
OkHttpClient.Builder builder = new OkHttpClient.Builder();
// We recommend accepting our certificate instead of allowing insecure (http) traffic
this.disableSSLCertificateChecking(builder);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("unblock.oxylabs.io", 60000));
var client = builder
.proxy(proxy)
.proxyAuthenticator(authenticator)
.build();
var request = new Request.Builder()
.url("https://youtube.com")
.addHeader("X-Oxylabs-Render", "html")
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
System.exit(0);
}
private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
TrustManager[] trustManagers = new TrustManager[]{
new X509TrustManager() {
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
}
};
try {
HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustManagers, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
builder.hostnameVerifier((hostname, session) -> true);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent'
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
`http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://youtube.com', {
method: 'get',
headers: headers,
agent: agent,
});
console.log(await response.text());
在浏览器中打开的HTML文件应该如下所示:
如需获取PNG格式的截图而不是页面HTML,则需要提供"X-Oxylabs-Render: png "
标头。
curl -k -v -x unblock.oxylabs.io:60000 \
-U user:pass1 "https://youtube.com" \
-H "X-Oxylabs-Render: png" >> rendered_page.png
import requests
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
'http': 'http://user:pass1@unblock.oxylabs.io:60000',
'https': 'http://user:pass1@unblock.oxylabs.io:60000',
}
headers = {
"X-Oxylabs-Render": "png"
}
response = requests.get(
'https://youtube.com',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Save screeenshot as PNG file
with open("rendered_page.png", 'wb') as f:
f.write(response.content)
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://youtube.com");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
CURLOPT_HTTPHEADER => array(
'X-Oxylabs-Render: png'
)
));
$result = curl_exec($ch);
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
if(file_exists('rendered_page.png')){
unlink('rendered_page.png');
}
$fp = fopen('rendered_page.png','wb');
fwrite($fp, $result);
fclose($fp);
?>
using System;
using System.Net;
using System.Net.Http;
using System.Drawing;
using System.IO;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri($"http://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom header
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "png");
Uri baseUri = new Uri("https://youtube.com");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
byte[] bytes = await response.Content.ReadAsByteArrayAsync();
using (Image image = Image.FromStream(new MemoryStream(bytes)))
{
image.Save("rendered_page.png");
}
}
}
}
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"http://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://youtube.com",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "png")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseData, _ := ioutil.ReadAll(response.Body)
response.Body.Close()
ioutil.WriteFile("rendered_page.png", responseData, 0666)
log.Println("Image was saved")
}
import fs from 'fs';
import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
`http://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'png',
}
await fetch('https://youtube.com', {
method: 'get',
headers: headers,
agent: agent,
}).then(res =>
res.body.pipe(fs.createWriteStream('./rendered_page.png'))
);
console.log('Image was saved');
响应将包含一个图像的原始字节,可以保存为PNG格式,并如下方示例打开: