Custom Parser
See the steps for a quick start with Oxylabs Custom Parser.
Custom Parser is a free Web Scraper API feature that lets you create parsing and data processing logic that’s executed on a raw HTML result. You can automatically generate parsers using AI or write them manually for advanced scenarios.
For in-depth instructions and examples, refer to these pages:
Quick start
1. Generate a parser
We recommend starting out with our AI-powered OxyCopilot tool that lets you generate scrapers and parsers without writing any code.
To access OxyCopilot, log in to the Oxylabs dashboard and select Scraper APIs Playground on the left-side menu.
Follow the steps shown in the video to generate a parser:
Here are the same steps shown in the video:
Enter the URL(s) you want to scrape and parse
Specify any parameters such as JavaScript rendering
Write a prompt that describes what you want to parse
Run OxyCopilot
Once you’re happy with the generated parser, load the instructions.
2. Save the parser as a preset
You can easily save your generated parsers through OxyCopilot for later use. See the steps below:
Assign the preset to a specific API user
Click Save
Enter the preset name and the description (optional)
After saving the preset, you can use it with API requests.
3. Use the parser with API requests
To use your preset with Web Scraper API, send a payload with the parser_preset parameter set to your preset's name. In the code samples below, we're reusing the example_parser preset created in previous steps.
curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
"source": "universal",
"url": "https://example.com/",
"parse": true,
"parser_preset": "example_parser"
}'import requests
from pprint import pprint
# Set the parser preset to use.
payload = {
'source': 'universal',
'url': 'https://example.com/',
'parse': True,
'parser_preset': 'example_parser'
}
# Get a response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('USERNAME', 'PASSWORD'),
json=payload
)
# Print prettified response to stdout.
pprint(response.json())const https = require("https");
const username = "USERNAME";
const password = "PASSWORD";
const body = {
source: "universal",
url: "https://example.com/",
parse: true,
parser_preset: "example_parser"
};
const options = {
hostname: "realtime.oxylabs.io",
path: "/v1/queries",
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization:
"Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
},
};
const request = https.request(options, (response) => {
let data = "";
response.on("data", (chunk) => {
data += chunk;
});
response.on("end", () => {
const responseData = JSON.parse(data);
console.log(JSON.stringify(responseData, null, 2));
});
});
request.on("error", (error) => {
console.error("Error:", error);
});
request.write(JSON.stringify(body));
request.end();# The whole string you submit has to be URL-encoded.
https://realtime.oxylabs.io/v1/queries?source=universal&url=https%3A%2F%2Fexample.com%2F&parse=true&parser_preset=example_parser&access_token=12345abcde<?php
$params = array(
'source' => 'universal',
'url' => 'https://example.com/',
'parse' => true,
'parser_preset' => 'example_parser'
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "USERNAME"
const Password = "PASSWORD"
payload := map[string]interface{}{
"source": "universal",
"url": "https://example.com/",
"parse": true,
"parser_preset": "example_parser",
}
jsonValue, _ := json.Marshal(payload)
client := &http.Client{}
request, _ := http.NewRequest("POST",
"https://realtime.oxylabs.io/v1/queries",
bytes.NewBuffer(jsonValue),
)
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "USERNAME";
const string Password = "PASSWORD";
var parameters = new {
source = "universal",
url = "https://example.com/",
parse = true,
parser_preset = "example_parser"
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}package org.example;
import okhttp3.*;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "USERNAME";
public static final String PASSWORD = "PASSWORD";
public void run() {
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "universal");
jsonObject.put("url", "https://example.com/");
jsonObject.put("parse", true);
jsonObject.put("parser_preset", "example_parser");
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.readTimeout(180, TimeUnit.SECONDS)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
if (response.body() != null) {
try (var responseBody = response.body()) {
System.out.println(responseBody.string());
}
}
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}{
"source": "universal",
"url": "https://example.com/",
"parse": true,
"parser_preset": "example_parser"
}Getting the HTML content of a parsed job
You may also retrieve the raw HTML result by adding ?type=raw to the end of the result retrieval URL. Read more here.
Last updated
Was this helpful?

