Push-Pull: Single Job
Push-Pull is our recommended integration method for reliably handling large amounts of data, including batch queries.
Visit the Oxylabs GitHub repository for a complete working example of Push-Pull integration in Python.
Push-Pull is an asynchronous integration method. Upon job submission, you will promptly receive a JSON
response containing all job details, including job parameters, ID, and URLs for result download and status checking. Once your job is processed, we will update you via a JSON
payload sent to your server, if you provided a callback URL. Results remain available for retrieval for at least 24 hours after completion.
With Push-Pull, you can upload your results directly to your cloud storage (AWS S3 or Google Cloud Storage).
If you prefer not to set up a service for incoming callback notifications, you can simply retrieve your results periodically (polling).
You can also explore how Push-Pull works using Postman.
Single Job
Endpoint
This endpoint accepts only a single query
or URL
value.
POST https://data.oxylabs.io/v1/queries
Input
Provide the job parameters in a JSON payload as shown in the examples below. Python and PHP examples include comments for clarity.
curl --user "user:pass1" \
'https://data.oxylabs.io/v1/queries' \
-H "Content-Type: application/json" \
-d '{"source": "ENTER_SOURCE_HERE", "url": "https://www.example.com", "geo_location": "United States", "callback_url": "https://your.callback.url", "storage_type": "s3", "storage_url": "s3://your.storage.bucket.url"}'
import requests
from pprint import pprint
# Structure payload.
payload = {
"source": "ENTER_SOURCE_HERE", # Source you choose e.g. "google"
"url": "https://www.example.com", # Check speficic source if you should use "url" or "query"
"geo_location": "United States", # Some sources accept zip-code or cooprdinates
#"render" : "html", # Uncomment you want to render JavaScript within the page
#"render" : "png", # Uncomment if you want to take a screenshot of a scraped web page
#"parse" : true, # Check what sources support parsed data
#"callback_url": "https://your.callback.url", #required if using callback listener
"callback_url": "https://your.callback.url",
"storage_type": "s3",
"storage_url": "s3://your.storage.bucket.url"
}
# Get response.
response = requests.request(
'POST',
'https://data.oxylabs.io/v1/queries',
auth=('YOUR_USERNAME', 'YOUR_PASSWORD'), #Your credentials go here
json=payload,
)
# Print prettified response to stdout.
pprint(response.json())
<?php
$params = array(
'source' => 'ENTER_SOURCE_HERE', //Source you choose e.g. "google"
'url' => 'https://www.example.com', // Check speficic source if you should use "url" or "query"
'geo_location' => 'United States', //Some sources accept zip-code or cooprdinates
//'render' : 'html', // Uncomment you want to render JavaScript within the page
//'render' : 'png', // Uncomment if you want to take a screenshot of a scraped web page
//'parse' : TRUE, // Check what sources support parsed data
//'callback_url' => 'https://your.callback.url', //required if using callback listener
'callback_url': 'https://your.callback.url',
'storage_type' => 's3',
'storage_url' => 's3://your.storage.bucket.url'
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://data.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD"); //Your credentials go here
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var parameters = new Dictionary<string, string>()
{
{ "source", "ENTER_SOURCE_HERE" },
{ "url", "https://example.com" },
{ "geo_location", "United States" },
{ "callback_url", "https://your.callback.url" },
};
var client = new HttpClient();
Uri baseUri = new Uri("https://data.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
payload := map[string]string{
"source": "ENTER_SOURCE_HERE",
"url": "https://example.com",
"geo_location": "United States",
"callback_url": "https://your.callback.url",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://data.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
package org.example;
import okhttp3.*;
import org.json.JSONObject;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "ENTER_SOURCE_HERE");
jsonObject.put("url", "https://example.com");
jsonObject.put("geo_location", "United States");
jsonObject.put("callback_url", "https://your.callback.url");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://data.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const body = {
source: 'ENTER_SOURCE_HERE',
url: 'https://www.example.com',
geo_location: 'United States',
callback_url: 'https://your.callback.url',
};
const response = await fetch('https://data.oxylabs.io/v1/queries', {
method: 'post',
body: JSON.stringify(body),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
Output
The API will respond with a JSON containing the job information, similar to this:
{
"callback_url": "https://your.callback.url",
"client_id": 5,
"context": [
{
"key": "results_language",
"value": null
},
{
"key": "safe_search",
"value": null
},
{
"key": "tbm",
"value": null
},
{
"key": "cr",
"value": null
},
{
"key": "filter",
"value": null
}
],
"created_at": "2019-10-01 00:00:01",
"domain": "com",
"geo_location": "United States",
"id": "12345678900987654321",
"limit": 10,
"locale": null,
"pages": 1,
"parse": false,
"render": null,
"url": "https://www.example.com",
"source": "universal",
"start_page": 1,
"status": "pending",
"storage_type": "s3",
"storage_url": "YOUR_BUCKET_NAME/12345678900987654321.json",
"subdomain": "www",
"updated_at": "2019-10-01 00:00:01",
"user_agent_type": "desktop",
"_links": [
{
"rel": "self",
"href": "http://data.oxylabs.io/v1/queries/12345678900987654321",
"method": "GET"
},
{
"rel": "results",
"href": "http://data.oxylabs.io/v1/queries/12345678900987654321/results",
"method": "GET"
}
]
}
Data dictionary
For detailed descriptions of the job input parameters, please consult the table below or refer to the specific documentation pages for the scrapers you are interested in.
Callback
The callback is a POST
request we send to your machine, informing that the data extraction task is completed and providing a URL to download scraped content. This means that you no don't need to check job status manually. Once the data is here, we will let you know, and all you need to do now is to retrieve it.
Input
# This is a simple Sanic web server with a route listening for callbacks on localhost:8080.
# It will print job results to stdout.
import requests
from pprint import pprint
from sanic import Sanic, response
AUTH_TUPLE = ('user', 'pass1')
app = Sanic()
# Define /job_listener endpoint that accepts POST requests.
@app.route('/job_listener', methods=['POST'])
async def job_listener(request):
try:
res = request.json
links = res.get('_links', [])
for link in links:
if link['rel'] == 'results':
# Sanic is async, but requests are synchronous, to fully take
# advantage of Sanic, use aiohttp.
res_response = requests.request(
method='GET',
url=link['href'],
auth=AUTH_TUPLE,
)
pprint(res_response.json())
break
except Exception as e:
print("Listener exception: {}".format(e))
return response.json(status=200, body={'status': 'ok'})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
<?php
$stdout = fopen('php://stdout', 'w');
if (isset($_POST)) {
$result = array_merge($_POST, (array) json_decode(file_get_contents('php://input')));
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://data.oxylabs.io/v1/queries/".$result['id'].'/results');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$result = curl_exec($ch);
fwrite($stdout, $result);
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
}
?>
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using System;
using System.Collections.Generic;
using System.Net.Http;
namespace OxyApiWeb
{
public class Callback
{
public Link[] _links { get; set; }
}
public class Link
{
public string rel { get; set; }
public string href { get; set; }
}
public class Startup
{
private const string USERNAME = "YOUR_USERNAME";
private const string PASSWORD = "YOUR_PASSWORD";
public Startup(IConfiguration configuration)
{
Configuration = configuration;
client = new HttpClient();
}
public IConfiguration Configuration { get; }
private HttpClient client;
public void ConfigureServices(IServiceCollection services)
{
services.AddControllers();
}
public void Configure(IApplicationBuilder app, IWebHostEnvironment env)
{
if (env.IsDevelopment())
{
app.UseDeveloperExceptionPage();
}
app.UseRouting();
app.UseAuthorization();
app.UseEndpoints(endpoints =>
{
endpoints.MapPost("/job_listener", async context =>
{
var callback = await System.Text.Json.JsonSerializer.DeserializeAsync<Callback>(context.Request.Body);
foreach (var link in callback._links)
{
if (link.rel != "results")
{
continue;
}
var requestMessage = new HttpRequestMessage(HttpMethod.Get, new Uri(link.href));
var authenticationString = $"{USERNAME}:{PASSWORD}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
var okMessage = new Dictionary<string, string>()
{
{ "message", "ok" }
};
await System.Text.Json.JsonSerializer.SerializeAsync(context.Response.Body, okMessage);
});
});
}
}
}
package main
import (
"fmt"
"github.com/labstack/echo/v4"
"io/ioutil"
"net/http"
)
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
type Callback struct {
Links []Link `json:"_links"`
}
type Link struct {
Href string `json:"href"`
Method string `json:"method"`
Rel string `json:"rel"`
}
func main() {
echoServer := echo.New()
client := &http.Client{}
echoServer.POST("/job_listener", func(context echo.Context) error {
callback := new(Callback)
if err := context.Bind(callback); err != nil {
return err
}
for _, link := range callback.Links {
if link.Rel != "results" {
continue
}
request, _ := http.NewRequest("GET",
link.Href,
nil,
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
return context.JSON(http.StatusOK, map[string]string { "status": "ok" })
})
echoServer.Logger.Fatal(echoServer.Start(":8080"))
}
package org.example;
import okhttp3.*;
import com.sun.net.httpserver.HttpServer;
import org.apache.commons.io.IOUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Objects;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
HttpServer server = null;
try {
server = HttpServer.create(new InetSocketAddress("0.0.0.0", 8080), 0);
} catch (IOException exception) {
exception.printStackTrace();
System.exit(1);
}
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
server.createContext("/job_listener", exchange -> {
var requestBody = IOUtils.toString(exchange.getRequestBody(), StandardCharsets.UTF_8);
JSONObject requestJson = new JSONObject(requestBody);
JSONArray links = requestJson.getJSONArray("_links");
for (var link : links.toList()) {
var linkMap = (Map<?, ?>)link;
if (!Objects.equals(linkMap.get("rel"), "results")) {
continue;
}
var request = new Request.Builder()
.url((String) linkMap.get("href"))
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
}
var responseJson = new JSONObject();
responseJson.put("status", "ok");
exchange.sendResponseHeaders(200, responseJson.toString().length());
OutputStream responseBody = exchange.getResponseBody();
responseBody.write(responseJson.toString().getBytes());
responseBody.flush();
responseBody.close();
exchange.close();
});
server.setExecutor(null);
server.start();
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import express from 'express'
import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const app = express();
app.use(express.json());
app.post('/job_listener', async(request, response) => {
for (const index in request.body._links) {
const link = request.body._links[index];
if (link.rel !== 'results') {
continue;
}
const jobResultResponse = await fetch(link.href, {
method: 'get',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await jobResultResponse.json());
}
response.send({status: 'ok'});
});
app.listen(8080);
Output
{
"created_at":"2019-10-01 00:00:01",
"updated_at":"2019-10-01 00:00:15",
"locale":null,
"client_id":163,
"user_agent_type":"desktop",
"source":"google_shopping_search",
"pages":1,
"subdomain":"www",
"status":"done",
"start_page":1,
"parse":0,
"render":null,
"priority":0,
"ttl":0,
"origin":"api",
"persist":true,
"id":"12345678900987654321",
"callback_url":"http://your.callback.url/",
"query":"adidas",
"domain":"com",
"limit":10,
"geo_location":null,
{...}
"_links":[
{
"href":"https://data.oxylabs.io/v1/queries/12345678900987654321",
"method":"GET",
"rel":"self"
},
{
"href":"https://data.oxylabs.io/v1/queries/12345678900987654321/results",
"method":"GET",
"rel":"results"
}
],
}
Check Job Status
If you provided a valid callback URL when submitting your job, we will notify you upon completion by sending a JSON
payload to the specified callback URL. This payload will indicate that the job has been completed and its status set to done
.
However, if you submitted a job without using callback service, you can check the job status manually. Retrieve the URL from the href
field in the rel:self
section of the response message received after job submission. The URL for checking the job status will resemble the following: http://data.oxylabs.io/v1/queries/12345678900987654321
. Querying this URL will return the job information, including its current status
.
Endpoint
GET https://data.oxylabs.io/v1/queries/{id}
Input
curl --user "user:pass1" \
'http://data.oxylabs.io/v1/queries/12345678900987654321'
import requests
from pprint import pprint
# Get response from stats endpoint.
response = requests.request(
method='GET',
url='http://data.oxylabs.io/v1/queries/12345678900987654321',
auth=('user', 'pass1'),
)
# Print prettified JSON response to stdout.
pprint(response.json())
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://data.oxylabs.io/v1/queries/12345678900987654321");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string JobId = "12345678900987654321";
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var client = new HttpClient();
Uri baseUri = new Uri("https://data.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, $"/v1/queries/{JobId}");
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const JobId = "12345678900987654321"
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
client := &http.Client{}
request, _ := http.NewRequest("GET",
fmt.Sprintf("https://data.oxylabs.io/v1/queries/%s", JobId),
nil,
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
package org.example;
import okhttp3.*;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
private static final String JOB_ID = "12345678900987654321";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var request = new Request.Builder()
.url(String.format("https://data.oxylabs.io/v1/queries/%s", JOB_ID))
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const jobId = '12345678900987654321';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const response = await fetch(`https://data.oxylabs.io/v1/queries/${jobId}`, {
method: 'get',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
Output
Upon completion of the job, the API will respond with query information in JSON format. The job status will be changed to done
, indicating that the job is finished. You can retrieve the content by querying one of the provided links. Additionally, the response will include the timestamp of when the job was last updated, allowing you to track its processing time.
{
"client_id": 5,
"context": [
{
"key": "results_language",
"value": null
},
{
"key": "safe_search",
"value": null
},
{
"key": "tbm",
"value": null
},
{
"key": "cr",
"value": null
},
{
"key": "filter",
"value": null
}
],
"created_at": "2019-10-01 00:00:01",
"domain": "com",
"geo_location": null,
"id": "7173957294344910849",
"limit": 10,
"locale": null,
"pages": 1,
"parse": false,
"render": null,
"query": "adidas",
"source": "google_shopping_search",
"start_page": 1,
"status": "done",
"subdomain": "www",
"updated_at": "2019-10-01 00:00:15",
"user_agent_type": "desktop",
"_links": [
{
"rel": "self",
"href": "http://data.oxylabs.io/v1/queries/7173957294344910849",
"method": "GET"
},
{
"rel": "results",
"href": "http://data.oxylabs.io/v1/queries/7173957294344910849/results",
"method": "GET"
},
{
"rel": "results-html",
"href": "http://data.oxylabs.io/v1/queries/7173957294344910849/results?type=raw",
"method": "GET"
},
{
"rel": "results-parsed",
"href": "http://data.oxylabs.io/v1/queries/7173957294344910849/results?type=parsed",
"method": "GET"
},
{
"rel": "results-parsed",
"href": "http://data.oxylabs.io/v1/queries/7173957294344910849/results?type=png",
"method": "GET"
}
]
}
Status values
Retrieve Job Content
Once the job is ready to be retrieved, you can use the URL provided in the response under the rel:results
section. The URL will look like this: http://data.oxylabs.io/v1/queries/7173957294344910849/results
.
Endpoints
You can retrieve different results types by using the following endpoints:
GET https://data.oxylabs.io/v1/queries/{job_id}/results
GET https://data.oxylabs.io/v1/queries/{job_id}/results?type=raw
GET https://data.oxylabs.io/v1/queries/{job_id}/results?type=parsed
GET https://data.oxylabs.io/v1/queries/{job_id}/results?type=png
Input
Below are code examples demonstrating how to use the /results
endpoint:
curl --user "user:pass1" \
'http://data.oxylabs.io/v1/queries/12345678900987654321/results'
import requests
from pprint import pprint
# Get response from stats endpoint.
response = requests.request(
method='GET',
url='http://data.oxylabs.io/v1/queries/12345678900987654321/results',
auth=('user', 'pass1'),
)
# Print prettified JSON response to stdout.
pprint(response.json())
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://data.oxylabs.io/v1/queries/12345678900987654321/results");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string JobId = "12345678900987654321";
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var client = new HttpClient();
Uri baseUri = new Uri("https://data.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, $"/v1/queries/{JobId}/results");
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const JobId = "12345678900987654321"
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
client := &http.Client{}
request, _ := http.NewRequest("GET",
fmt.Sprintf("https://data.oxylabs.io/v1/queries/%s/results", JobId),
nil,
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
package org.example;
import okhttp3.*;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
private static final String JOB_ID = "12345678900987654321";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var request = new Request.Builder()
.url(String.format("https://data.oxylabs.io/v1/queries/%s/results", JOB_ID))
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const jobId = '12345678900987654321';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const response = await fetch(`https://data.oxylabs.io/v1/queries/${jobId}/results`, {
method: 'get',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
Output
This table explains the default and other available result types based on the headers included in the payload of the API request.
Below is a sample response of the /results
endpoint:
{
"results": [
{
"content": "<!doctype html><html>
CONTENT
</html>",
"created_at": "2019-10-01 00:00:01",
"updated_at": "2019-10-01 00:00:15",
"page": 1,
"url": "https://www.google.com/search?q=adidas&hl=en&gl=US",
"job_id": "12345678900987654321",
"status_code": 200
}
]
}
The results can be automatically retrieved without periodically checking job status by setting up Callback service. To do that, specify the URL of a server that is able to accept incoming HTTP(S) requests while submitting a job. When our system completes the job, it will POST
a JSON payload to the provided URL, and the Callback service will download the results as described in the Callback implementation example.
Get Notifier IP Address List
You may want to whitelist the IPs sending you callback messages or get the list of these IPs for other purposes. You can do this by GET
ting this endpoint:
Endpoint
GET https://data.oxylabs.io/v1/info/callbacker_ips
Input
The code examples below show how you can access the /callbacker_ips
endpoint:
curl --user "user:pass1" \
'https://data.oxylabs.io/v1/info/callbacker_ips'
import requests
from pprint import pprint
# Get response from the callback IPs endpoint.
response = requests.request(
method='GET',
url='https://data.oxylabs.io/v1/info/callbacker_ips',
auth=('user', 'pass1'),
)
# Print prettified JSON response to stdout.
pprint(response.json())
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://data.oxylabs.io/v1/info/callbacker_ips");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var client = new HttpClient();
Uri baseUri = new Uri("https://data.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "/v1/info/callbacker_ips");
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
client := &http.Client{}
request, _ := http.NewRequest("GET",
"https://data.oxylabs.io/v1/info/callbacker_ips",
nil,
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
package org.example;
import okhttp3.*;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var request = new Request.Builder()
.url("https://data.oxylabs.io/v1/info/callbacker_ips")
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const response = await fetch('https://data.oxylabs.io/v1/info/callbacker_ips', {
method: 'get',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
Output
The API will return the list of IPs making callback requests to your system:
{
"ips": [
"x.x.x.x",
"y.y.y.y"
]
}
Scheduler
Scheduler is a service that you can use to schedule recurring scraping jobs.
It extends the functionality of Push-Pull integration and is best used together with the Cloud integration functionality. Read more about Scheduler feature here.
Last updated