JavaScript rendering
Last updated
Last updated
If the page you wish to scrape requires loading JavaScript to dynamically load all required data into the DOM, instead of setting up and using a headless browser by yourself, you can include the "X-Oxylabs-Render: html"
header with your requests. All requests with this header included will be fully rendered, and all data will be stored in an HTML file or PNG screenshot (depending on the passed parameter).
JavaScript rendering takes more time to scrape the page. When using JavaScript rendering, set client side timeout value to 180 seconds.
To ensure lowest traffic consumption, our system does not load unnecessary assets during page rendering.
This parameter has two available values:
html
(HTML of a rendered page)
png
(raw bytes that can be saved as PNG)
curl -k -v -x https://unblock.oxylabs.io:60000 \
-U 'USERNAME:PASSWORD' \
'https://ip.oxylabs.io/location' \
-H 'X-Oxylabs-Render: html'
import requests
# Use your Web Unblocker credentials here.
USERNAME, PASSWORD = 'YOUR_USERNAME', 'YOUR_PASSWORD'
# Define proxy dict.
proxies = {
'http': f'http://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
'https': f'https://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
}
headers = {
'X-Oxylabs-Render': 'html'
}
response = requests.get(
'https://ip.oxylabs.io/location',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
f.write(response.text)
import fetch from 'node-fetch';
import { HttpsProxyAgent } from 'https-proxy-agent';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = new HttpsProxyAgent(
`https://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://ip.oxylabs.io/location', {
method: 'get',
headers: headers,
agent: agent,
});
console.log(await response.text());
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://ip.oxylabs.io/location');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'https://unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'YOUR_USERNAME' . ':' . 'YOUR_PASSWORD');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, array(
CURLOPT_HTTPHEADER => array(
'X-Oxylabs-Render: html'
)
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"https://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://ip.oxylabs.io/location",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "html")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri("https://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom cookies
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
Uri baseUri = new Uri("https://ip.oxylabs.io/location");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package org.example;
import org.apache.hc.client5.http.auth.AuthScope;
import org.apache.hc.client5.http.auth.CredentialsProvider;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.auth.CredentialsProviderBuilder;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
import org.apache.hc.client5.http.ssl.SSLConnectionSocketFactoryBuilder;
import org.apache.hc.client5.http.ssl.TrustAllStrategy;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.StatusLine;
import org.apache.hc.core5.ssl.SSLContextBuilder;
import java.util.Arrays;
import java.util.Properties;
public class Main {
public static void main(final String[] args)throws Exception {
final CredentialsProvider credsProvider = CredentialsProviderBuilder.create()
.add(new AuthScope("unblock.oxylabs.io", 60000), "USERNAME", "PASSWORD".toCharArray())
.build();
final HttpHost target = new HttpHost("https", "ip.oxylabs.io", 443);
final HttpHost proxy = new HttpHost("https", "unblock.oxylabs.io", 60000);
try (final CloseableHttpClient httpclient = HttpClients.custom()
.setDefaultCredentialsProvider(credsProvider)
.setProxy(proxy)
// We recommend accepting our certificate instead of allowing insecure (http) traffic
.setConnectionManager(PoolingHttpClientConnectionManagerBuilder.create()
.setSSLSocketFactory(SSLConnectionSocketFactoryBuilder.create()
.setSslContext(SSLContextBuilder.create()
.loadTrustMaterial(TrustAllStrategy.INSTANCE)
.build())
.setHostnameVerifier(NoopHostnameVerifier.INSTANCE)
.build())
.build())
.build()) {
final RequestConfig config = RequestConfig.custom()
.build();
final HttpGet request = new HttpGet("/location");
request.addHeader("X-Oxylabs-Render","html");
request.setConfig(config);
System.out.println("Executing request " + request.getMethod() + " " + request.getUri() +
" via " + proxy + " headers: " + Arrays.toString(request.getHeaders()));
httpclient.execute(target, request, response -> {
System.out.println("----------------------------------------");
System.out.println(request + "->" + new StatusLine(response));
EntityUtils.consume(response.getEntity());
return null;
});
}
}
}
Scraping a website HTML
In this example, we will render the YouTube home page and scrape the page content. Normally Youtube homepage would look like this if Web Unblocker is used without Javascript rendering:
Adding the "X-Oxylabs-Render: html"
header, as provided in the examples below, will enable Javascript rendering and return an HTML of a rendered page:
curl -k -v -x https://unblock.oxylabs.io:60000 \
-U 'USERNAME:PASSWORD' \
'https://youtube.com' \
-H 'X-Oxylabs-Render: html'
import requests
# Use your Web Unblocker credentials here.
USERNAME, PASSWORD = 'YOUR_USERNAME', 'YOUR_PASSWORD'
# Define proxy dict.
proxies = {
'http': f'http://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
'https': f'https://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
}
headers = {
'X-Oxylabs-Render': 'html'
}
response = requests.get(
'https://youtube.com',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Print result page to stdout
print(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
f.write(response.text)
import fetch from 'node-fetch';
import { HttpsProxyAgent } from 'https-proxy-agent';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = new HttpsProxyAgent(
`https://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'html',
}
const response = await fetch('https://youtube.com', {
method: 'get',
headers: headers,
agent: agent,
});
console.log(await response.text());
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://youtube.com');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'https://unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'YOUR_USERNAME' . ':' . 'YOUR_PASSWORD');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, [
CURLOPT_HTTPHEADER => [
'X-Oxylabs-Render: html'
]
]);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"https://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://youtube.com",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "html")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri("https://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom header
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "html");
Uri baseUri = new Uri("https://youtube.com");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package org.example;
import org.apache.hc.client5.http.auth.AuthScope;
import org.apache.hc.client5.http.auth.CredentialsProvider;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.auth.CredentialsProviderBuilder;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
import org.apache.hc.client5.http.ssl.SSLConnectionSocketFactoryBuilder;
import org.apache.hc.client5.http.ssl.TrustAllStrategy;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.StatusLine;
import org.apache.hc.core5.ssl.SSLContextBuilder;
import java.util.Arrays;
import java.util.Properties;
public class Main {
public static void main(final String[] args)throws Exception {
final CredentialsProvider credsProvider = CredentialsProviderBuilder.create()
.add(new AuthScope("unblock.oxylabs.io", 60000), "USERNAME", "PASSWORD".toCharArray())
.build();
final HttpHost target = new HttpHost("https", "youtube.com", 443);
final HttpHost proxy = new HttpHost("https", "unblock.oxylabs.io", 60000);
try (final CloseableHttpClient httpclient = HttpClients.custom()
.setDefaultCredentialsProvider(credsProvider)
.setProxy(proxy)
// We recommend accepting our certificate instead of allowing insecure (http) traffic
.setConnectionManager(PoolingHttpClientConnectionManagerBuilder.create()
.setSSLSocketFactory(SSLConnectionSocketFactoryBuilder.create()
.setSslContext(SSLContextBuilder.create()
.loadTrustMaterial(TrustAllStrategy.INSTANCE)
.build())
.setHostnameVerifier(NoopHostnameVerifier.INSTANCE)
.build())
.build())
.build()) {
final RequestConfig config = RequestConfig.custom()
.build();
final HttpGet request = new HttpGet("/");
request.addHeader("X-Oxylabs-Render","html");
request.setConfig(config);
System.out.println("Executing request " + request.getMethod() + " " + request.getUri() +
" via " + proxy + " headers: " + Arrays.toString(request.getHeaders()));
httpclient.execute(target, request, response -> {
System.out.println("----------------------------------------");
System.out.println(request + "->" + new StatusLine(response));
EntityUtils.consume(response.getEntity());
return null;
});
}
}
}
The HTML file opened in a browser should look like this:
Getting a screenshot of a fully rendered page
To get a screenshot in PNG format instead of page HTML, it is required to provide the "X-Oxylabs-Render: png"
header.
curl -k -v -x https://unblock.oxylabs.io:60000 \
-U 'USERNAME:PASSWORD' \
'https://youtube.com' \
-H 'X-Oxylabs-Render: png' > rendered_page.png
import requests
# Use your Web Unblocker credentials here.
USERNAME, PASSWORD = 'YOUR_USERNAME', 'YOUR_PASSWORD'
# Define proxy dict.
proxies = {
'http': f'http://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
'https': f'https://{USERNAME}:{PASSWORD}@unblock.oxylabs.io:60000',
}
headers = {
'X-Oxylabs-Render': 'png'
}
response = requests.get(
'https://youtube.com',
verify=False, # It is required to ignore certificate
proxies=proxies,
headers=headers,
)
# Save screeenshot as PNG file
with open('rendered_page.png', 'wb') as f:
f.write(response.content)
import fs from 'fs';
import fetch from 'node-fetch';
import { HttpsProxyAgent } from 'https-proxy-agent';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = new HttpsProxyAgent(
`https://${username}:${password}@unblock.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'X-Oxylabs-Render': 'png',
}
await fetch('https://youtube.com', {
method: 'get',
headers: headers,
agent: agent,
}).then(res =>
res.body.pipe(fs.createWriteStream('./rendered_page.png'))
);
console.log('Image was saved');
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://youtube.com');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'https://unblock.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, 'YOUR_USERNAME' . ':' . 'YOUR_PASSWORD');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt_array($ch, [
CURLOPT_HTTPHEADER => [
'X-Oxylabs-Render: png'
]
]);
$result = curl_exec($ch);
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
if(file_exists('rendered_page.png')){
unlink('rendered_page.png');
}
$fp = fopen('rendered_page.png','wb');
fwrite($fp, $result);
fclose($fp);
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"https://%s:%s@unblock.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://youtube.com",
nil,
)
// Add custom cookies
request.Header.Add("X-Oxylabs-Render", "png")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseData, _ := ioutil.ReadAll(response.Body)
response.Body.Close()
ioutil.WriteFile("rendered_page.png", responseData, 0666)
log.Println("Image was saved")
}
using System;
using System.Net;
using System.Net.Http;
using System.Drawing;
using System.IO;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri("https://unblock.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
// Add custom header
client.DefaultRequestHeaders.Add("X-Oxylabs-Render", "png");
Uri baseUri = new Uri("https://youtube.com");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
byte[] bytes = await response.Content.ReadAsByteArrayAsync();
using (Image image = Image.FromStream(new MemoryStream(bytes)))
{
image.Save("rendered_page.png");
}
}
}
}
package org.example;
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.util.concurrent.TimeUnit;
import java.io.File;
import org.apache.commons.io.FileUtils;
public class App implements Runnable
{
private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
OkHttpClient.Builder builder = new OkHttpClient.Builder()
.readTimeout(180, TimeUnit.SECONDS)
.writeTimeout(180, TimeUnit.SECONDS);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
this.disableSSLCertificateChecking(builder);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("unblock.oxylabs.io", 60000));
OkHttpClient client = builder
.proxy(proxy)
.proxyAuthenticator(authenticator)
.build();
var request = new Request.Builder()
.url("https://youtube.com")
.addHeader("X-Oxylabs-Render", "png")
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
okhttp3.ResponseBody body = response.body();
if (body != null) {
byte[] bytes = body.bytes();
FileUtils.writeByteArrayToFile(new File("rendered_page.png"), bytes);
}
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
System.exit(0);
}
private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
TrustManager[] trustManagers = new TrustManager[]{
new X509TrustManager() {
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
}
};
try {
HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustManagers, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
builder.hostnameVerifier((hostname, session) -> true);
}
public static void main(String[] args) {
new Thread(new App()).start();
}
}
The response will contain raw bytes of an image that can be saved in PNG format and opened as in the example below:
For successful scraping, some page types of specific domains require rendering due to their dynamic content. Our system automatically enforces rendering for these pages, even if not explicitly set by the user.
Please note that rendered jobs consume more traffic compared to non-rendered jobs.
We want our users to be fully aware of this when scraping the following pages:
This approach provides the best possible scraping experience, ensuring data accuracy and reliability from these challenging pages.
If you wish to disable rendering, send the render header without a value
X-oxylabs-render: