Other Search Engines
Scrape other search engines (Yahoo, Yandex, DuckDuckGo, Baidu, etc.) with our universal
source. It accepts URLs along with additional parameters.
Request samples
In the example below, we make a request to retrieve a result for the provided Baidu URL.
curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
"source": "universal",
"url": "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas"
}'
import requests
from pprint import pprint
# Structure payload.
payload = {
'source': 'universal',
'url': 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas'
}
# Get response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('USERNAME', 'PASSWORD'),
json=payload,
)
# Instead of response with job status and results url, this will return the
# JSON response with results.
pprint(response.json())
const https = require("https");
const username = "USERNAME";
const password = "PASSWORD";
const body = {
source: "universal",
query: "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas",
};
const options = {
hostname: "realtime.oxylabs.io",
path: "/v1/queries",
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization:
"Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
},
};
const request = https.request(options, (response) => {
let data = "";
response.on("data", (chunk) => {
data += chunk;
});
response.on("end", () => {
const responseData = JSON.parse(data);
console.log(JSON.stringify(responseData, null, 2));
});
});
request.on("error", (error) => {
console.error("Error:", error);
});
request.write(JSON.stringify(body));
request.end();
# URL has to be encoded to escape `&` and `=` characters:
# URL: http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas
# Encoded URL: http%3A%2F%2Fwww.baidu.com%2Fs%3Fie%3Dutf-8%26f%3D8%26rsv_bp%3D1%26rsv_idx%3D1%26ch%3D%26tn%3Dbaidu%26bar%3D%26wd%3Dadidas
https://realtime.oxylabs.io/v1/queries?source=universal&url=https%3A%2F%2Fwww.baidu.com%2Fs%3Fie%3Dutf-8%26f%3D8%26rsv_bp%3D1%26rsv_idx%3D1%26ch%3D%26tn%3Dbaidu&bar=&wd=adidas&access_token=12345abcde
<?php
$params = array(
'source' => 'universal',
'url' => 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas'
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "USERNAME"
const Password = "PASSWORD"
payload := map[string]interface{}{
"source": "universal",
"url": "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://realtime.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "USERNAME";
const string Password = "PASSWORD";
var parameters = new {
source = "universal",
url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas"
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package org.example;
import okhttp3.*;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "USERNAME";
public static final String PASSWORD = "PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "universal");
jsonObject.put("url", "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.readTimeout(180, TimeUnit.SECONDS)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
if (response.body() != null) {
try (var responseBody = response.body()) {
System.out.println(responseBody.string());
}
}
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
{
"source": "universal",
"url": "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas"
}
Output example
{
"results": [
{
"content": "<!DOCTYPE html>
CONTENT
</html>",
"created_at": "2024-07-01 08:48:59",
"updated_at": "2024-07-01 08:49:02",
"page": 1,
"url": "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=adidas",
"job_id": "7213463587904304129",
"status_code": 200,
"session_info": {
"id": null,
"expires_at": null,
"remaining": null
}
}
]
}
We use synchronous Realtime integration method in our examples. If you would like to use Proxy Endpoint or asynchronous Push-Pull integration, refer to the integration methods section.
Request parameter values
Generic
Basic setup and customization options for scraping other search engines.
- mandatory parameter
Additional
These are the parameters of our features.
Payload sample
In this sample, we include some parameters from the table above. Note that while these parameters are not always necessary or compatible within the same request, they illustrate how to format your requests.
{
"source": "universal",
"url": "https://example.com",
"user_agent_type": "desktop",
"geo_location": "United States",
"context": [
{
"key": "headers",
"value": {
"Content-Type": "application/octet-stream",
"Custom-Header-Name": "custom header content"
}
},
{
"key": "cookies",
"value": [
{
"key": "NID",
"value": "1234567890"
},
{
"key": "1P JAR",
"value": "0987654321"
}
]
},
{
"key": "follow_redirects",
"value": true
},
{
"key": "http_method",
"value": "post"
},
{
"key": "content",
"value": "base64EncodedPOSTBody"
},
{
"key": "successful_status_codes",
"value": [303, 808, 909]
}
],
"parsing_instructions": {
"title": {
"_fns": [
{
"_fn": "xpath_one",
"_args": ["//h1/text()"]
}
]
}
}
}
Last updated