推拉
推拉式并非是最简单的集成方法,但它是最可靠的方法 ,这就是为什么我们推荐实施这种方法,尤其是当您处理大量数据时。
推拉式是一种异步集成方法。这意味着,提交作业后,我们将迅速返回一个包含作业信息(所有提交的作业参数和作业 ID,以及用于下载结果和检查作业状态的 URL)的 JSON。通过这种集成方法,作业提交过程完全独立于下载结果。
在我们处理完您的作业之后,如果您在提交作业时提供了一个 回调 URL,我们将
POST
一个包含更新作业信息的 JSON 有效载荷(包括作业的 status
设置为 done
)到您的服务器。此时,您可以继续从我们的系统中下载结果。我们将结果保留在完成后至少 24 小时 内可供检索。注意:如果您不想麻烦地设置一个接受传入回调通知的服务,则可以尝试每隔几秒钟就得到您的结果(这个概念叫做 轮询).
下面这个端点只接受一个
query
或 url
值。POST https://data.oxylabs.io/v1/queries
您必须以 JSON 有效载荷发送您的作业参数,如以下代码示例所示:
cURL
Python
PHP
C#
Golang
Java
Node.js
curl --user user:pass1 \
'https://data.oxylabs.io/v1/queries' \
-H "Content-Type: application/json" \
-d '{"source": "ENTER_SOURCE_HERE", "url": "https://www.example.com", "geo_location": "United States", "callback_url": "https://your.callback.url", "storage_type": "s3", "storage_url": "s3://your.storage.bucket.url"}'
import requests
from pprint import pprint
# Structure payload.
payload = {
"source": "ENTER_SOURCE_HERE", # Source you choose e.g. "universal"
"url": "https://www.example.com", # Check speficic source if you should use "url" or "query"
"geo_location": "United States", # Some sources accept zip-code or cooprdinates
#"render" : "html", # Uncomment you want to render JavaScript within the page
#"parse" : true, # Check what sources support parsed data
#"callback_url": "https://your.callback.url", #required if using callback listener
"callback_url": "https://your.callback.url",
"storage_type": "s3",
"storage_url": "s3://your.storage.bucket.url"
}
# Get response.
response = requests.request(
'POST',
'https://data.oxylabs.io/v1/queries',
auth=('YOUR_USERNAME', 'YOUR_PASSWORD'), #Your credentials go here
json=payload,
)
# Print prettified response to stdout.
pprint(response.json())
<?php
$params = array(
'source' => 'ENTER_SOURCE_HERE', //Source you choose e.g. "universal"
'url' => 'https://www.example.com', // Check speficic source if you should use "url" or "query"
'geo_location' => 'United States', //Some sources accept zip-code or cooprdinates
//'render' : 'html', // Uncomment you want to render JavaScript within the page
//'parse' : TRUE, // Check what sources support parsed data
//'callback_url' => 'https://your.callback.url', //required if using callback listener
'callback_url': 'https://your.callback.url',
'storage_type' => 's3',
'storage_url' => 's3://your.storage.bucket.url'
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://data.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD"); //Your credentials go here
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var parameters = new Dictionary<string, string>()
{
{ "source", "ENTER_SOURCE_HERE" },
{ "url", "https://example.com" },
{ "geo_location", "United States" },
{ "callback_url", "https://your.callback.url" },
};
var client = new HttpClient();
Uri baseUri = new Uri("https://data.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
payload := map[string]string{
"source": "ENTER_SOURCE_HERE",
"url": "https://example.com",
"geo_location": "United States",
"callback_url": "https://your.callback.url",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://data.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.Header.Add("Content-type", "application/json")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
import okhttp3.*;
import org.json.JSONObject;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "ENTER_SOURCE_HERE");
jsonObject.put("url", "https://example.com");
jsonObject.put("geo_location", "United States");
jsonObject.put("callback_url", "https://your.callback.url");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://data.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const body = {
source: 'ENTER_SOURCE_HERE',
url: 'https://www.example.com',
geo_location: 'United States',
callback_url: 'https://your.callback.url',
};
const response = await fetch('https://data.oxylabs.io/v1/queries', {
method: 'post',
body: JSON.stringify(body),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
API 将响应一个包含作业信息的 JSON,具体如下:
{
"callback_url": "https://your.callback.url",
"client_id": 5,
"context": [
{
"key": "results_language",
"value": null
},
{
"key": "safe_search",
"value": null
},
{
"key": "tbm",
"value": null
},
{
"key": "cr",
"value": null
},
{
"key": "filter",
"value": null
}
],
"created_at": "2019-10-01 00:00:01",
"domain": "com",
"geo_location": "United States",
"id": "12345678900987654321",
"limit": 10,
"locale": null,
"pages": 1,
"parse": false,
"render": null,
"url": "https://www.example.com",
"source": "universal",
"start_page": 1,
"status": "pending",
"storage_type": "s3",
"storage_url": "YOUR_BUCKET_NAME/12345678900987654321.json",
"subdomain": "www",
"updated_at": "2019-10-01 00:00:01",
"user_agent_type": "desktop",
"_links": [
{
"rel": "self",
"href": "http://data.oxylabs.io/v1/queries/12345678900987654321",
"method": "GET"
},
{
"rel": "results",
"href": "http://data.oxylabs.io/v1/queries/12345678900987654321/results",
"method": "GET"
}
]
}