Amazon
There are various page types we can scrape and parse on Amazon. You can either provide us with a full URL or a few input parameters via specifically built data sources (Search, Product, Offer listing, Reviews, Questions & Answers, Best Sellers, or Sellers) so we can form the URL on our end.
IMPORTANT: On most page types, Amazon tailors the returned result, based the delivery location of their customers. Therefore, we advise that you use the
geo_location
parameter to set your preferred delivery location. You can read more about using geo_location
with Amazon here.Below is a quick overview of all the available data
source
values we support with Amazon.Source | Description | Structured data |
---|---|---|
amazon | Submit any Amazon URL you like. | Depends on the URL. |
amazon_bestsellers | List of best seller items in a taxonomy node of your choice. | Yes |
amazon_pricing | List of offers available for an ASIN of your choice. | Yes. |
amazon_product | Product page of an ASIN of your choice. | Yes. |
amazon_questions | Q&A page of an ASIN of your choice. | Yes. |
amazon_reviews | Reviews page of an ASIN of your choice. | Yes. |
amazon_search | Search results for a search term of your choice. | Yes. |
amazon_sellers | Seller information of a seller of your choice. | Yes. |
You can jump to your preferred Amazon page type by selecting its name on the right hand side menu. Each page contains the parameter table as well as code examples to help you get started with your query.
The
amazon
source is designed to retrieve the content from various Amazon URLs. Instead of sending multiple parameters, you can provide us with a direct URL to the required Amazon page. We do not strip any parameters or alter your URLs in any way.This data source also supports parsed data (structured data in JSON format), as long as the URL submitted is for one of the page types we can parse. If we cannot confirm that we can parse the page you requested, you will receive a failure message.
Parameter | Description | Default Value |
---|---|---|
source | N/A | |
url | Direct URL (link) to Amazon page | - |
user_agent_type | desktop | |
render | - | |
callback_url | - | |
parse | true will return structured data, as long as the URL submitted is for one of the page types we can parse. | false |
- required parameter
In the code examples below, we make a request to retrieve the Amazon product page for ASIN
B08Y6Z944Q
.JSON
cURL
Python
PHP
C#
Golang
Java
Node.js
{
"source": "amazon",
"url": "https://www.amazon.co.uk/dp/B08Y6Z944Q/",
"parse": true
}
curl --user USERNAME:PASSWORD \
'https://realtime.oxylabs.io/v1/queries' \
-H "Content-Type: application/json" \
-d '{"source": "amazon", "url": "https://www.amazon.co.uk/dp/B08Y6Z944Q/", "parse": true}'
import requests
from pprint import pprint
# Structure payload.
payload = {
'source': 'amazon',
'url': 'https://www.amazon.co.uk/dp/B08Y6Z944Q/',
'parse': True
}
# Get response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('YOUR_USERNAME', 'YOUR_PASSWORD'), #Your credentials go here
json=payload,
)
# Instead of response with job status and results url, this will return the
# JSON response with results.
pprint(response.json())
<?php
$params = array(
'source' => 'amazon',
'url' => 'https://www.amazon.co.uk/dp/B08Y6Z944Q/',
'parse' => true
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD"); //Your credentials go here
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var parameters = new Dictionary<string, string>()
{
{ "source", "amazon" },
{ "url", "https://www.amazon.co.uk/dp/B08Y6Z944Q/" },
{ "parse", true },
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
payload := map[string]string{
"source": "amazon",
"url": "https://www.amazon.co.uk/dp/B08Y6Z944Q/",
"parse": true,
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://realtime.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
import okhttp3.*;
import org.json.JSONObject;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "amazon");
jsonObject.put("url", "https://www.amazon.co.uk/dp/B08Y6Z944Q/");
jsonObject.put("parse", true);
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const body = {
'source': 'amazon',
'url': 'https://www.amazon.co.uk/dp/B08Y6Z944Q/',
'parse': true
};
const response = await fetch('https://realtime.oxylabs.io/v1/queries', {
method: 'post',
body: JSON.stringify(body),
headers: {
'Content-Type': 'application/json',
'Authorization': 'Basic ' + Buffer.from(`${username}:${password}`).toString('base64'),
}
});
console.log(await response.json());
The example above uses the Realtime integration method. If you would like to use some other integration method in your query (e.g. Push-Pull or Proxy Endpoint), refer to the integration methods section.
The
amazon_search
source is designed to retrieve Amazon search result pages. To see the response example with retrieved data, download this sample output file in JSON format.Parameter | Description | Default Value |
---|---|---|
source | amazon_search | |
domain | com | |
query | UTF-encoded keyword | - |
start_page | Starting page number | 1 |
pages | Number of pages to retrieve | 1 |
geo_location | - | |
user_agent_type | desktop | |
render | - | |
callback_url | - | |
parse | - | |
context :
category_id | Search for items in a particular browse node (product category). | - |
context :
merchant_id | Search for items sold by a particular seller. | - |
- required parameter
In the code examples below, we make a request to retrieve a result from
amazon.nl
, which includes 2
search results pages, starting from page #2
, for the search term nirvana tshirt
. Additionally, the search is be limited to category ID: 16391693031
.JSON
cURL
Python
PHP
C#
Golang
Node.js
{
"source": "amazon_search",
"domain": "nl",
"query": "nirvana tshirt",
"start_page": 2,
"pages": 2,
"parse": true,
"context": [
{
"key": "category_id",
"value": "16391693031"
}]
}
curl --user user:pass1 'https://realtime.oxylabs.io/v1/queries' -H "Content-Type: application/json"
-d '{"source": "amazon_search", "domain": "nl", "query": "nirvana tshirt", "start_page": 2, "pages": 2, "parse": true, "context" [{"key": "category_id", "value": "16391843031"}]}'
import requests
from pprint import pprint
# Structure payload.
payload = {
'source': 'amazon_search',
'domain': 'nl',
'query': 'nirvana tshirt',
'start_page': 2,
'pages': 2,
'parse': True,
'context': [
{'key': 'category_id', 'value': 16391693031}
],
}
# Get response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('user', 'pass1'),
json=payload,
)
# Print prettified response to stdout.
pprint(response.json())
<?php
$params = array(
'source' => 'amazon_search',
'domain' => 'nl',
'query' => 'nirvana tshirt',
'start_page' => 2,
'pages' => 2,
'parse' => true,
'context' => [
[
'key' => 'category_id',
'value' => 16391693031,
]
],
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "user" . ":" . "pass1");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "YOUR_USERNAME";
const string Password = "YOUR_PASSWORD";
var parameters = new {
source = "amazon_search",
domain = "nl",
query = "nirvana tshirt",
start_page = 2,
pages = 2,
parse = true,
context = new dynamic [] {
new { key = "category_id", value = 16391693031 }
}
};
var client = new HttpClient();