Custom Parser

See the steps for a quick start with Oxylabs Custom Parser.

Custom Parser is a free Web Scraper API feature that lets you create parsing and data processing logic that’s executed on a raw HTML result. You can automatically generate parsers using AI or write them manually for advanced scenarios.

For in-depth instructions and examples, refer to these pages:

Getting started Generating parsers via API Parser Presets

Writing instructions manually List of parsing functions

Quick start

1. Generate a parser

We recommend starting out with our AI-powered OxyCopilot tool that lets you generate scrapers and parsers without writing any code.

To access OxyCopilot, log in to the Oxylabs dashboard and select Scraper APIs Playground on the left-side menu.

Follow the steps shown in the video to generate a parser:

Here are the same steps shown in the video:

Enter the URL(s) you want to scrape and parse
Specify any parameters such as JavaScript rendering
Write a prompt that describes what you want to parse
Run OxyCopilot

Once you’re happy with the generated parser, load the instructions.

2. Save the parser as a preset

You can easily save your generated parsers through OxyCopilot for later use. See the steps below:

Assign the preset to a specific API user
Click Save
Enter the preset name and the description (optional)

After saving the preset, you can use it with API requests.

3. Use the parser with API requests

To use your preset with Web Scraper API, send a payload with the parser_preset parameter set to your preset's name. In the code samples below, we're reusing the example_parser preset created in previous steps.

curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
        "source": "universal",
        "url": "https://example.com/",
        "parse": true,
        "parser_preset": "example_parser"
    }'

import requests
from pprint import pprint


# Set the parser preset to use.
payload = {
    'source': 'universal',
    'url': 'https://example.com/',
    'parse': True,
    'parser_preset': 'example_parser'
}

# Get a response.
response = requests.request(
    'POST',
    'https://realtime.oxylabs.io/v1/queries',
    auth=('USERNAME', 'PASSWORD'),
    json=payload
)

# Print prettified response to stdout.
pprint(response.json())

const https = require("https");

const username = "USERNAME";
const password = "PASSWORD";
const body = {
    source: "universal",
    url: "https://example.com/",
    parse: true,
    parser_preset: "example_parser"
};

const options = {
    hostname: "realtime.oxylabs.io",
    path: "/v1/queries",
    method: "POST",
    headers: {
        "Content-Type": "application/json",
        Authorization:
            "Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
    },
};

const request = https.request(options, (response) => {
    let data = "";

    response.on("data", (chunk) => {
        data += chunk;
    });

    response.on("end", () => {
        const responseData = JSON.parse(data);
        console.log(JSON.stringify(responseData, null, 2));
    });
});

request.on("error", (error) => {
    console.error("Error:", error);
});

request.write(JSON.stringify(body));
request.end();

# The whole string you submit has to be URL-encoded.

https://realtime.oxylabs.io/v1/queries?source=universal&url=https%3A%2F%2Fexample.com%2F&parse=true&parser_preset=example_parser&access_token=12345abcde

<?php

$params = array(
    'source' => 'universal',
    'url' => 'https://example.com/',
    'parse' => true,
    'parser_preset' => 'example_parser'
);

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");

$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

$result = curl_exec($ch);
echo $result;

if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close($ch);

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
)

func main() {
	const Username = "USERNAME"
	const Password = "PASSWORD"

	payload := map[string]interface{}{
		"source": "universal",
		"url": "https://example.com/",
		"parse": true,
		"parser_preset": "example_parser",
	}

	jsonValue, _ := json.Marshal(payload)

	client := &http.Client{}
	request, _ := http.NewRequest("POST",
		"https://realtime.oxylabs.io/v1/queries",
		bytes.NewBuffer(jsonValue),
	)

	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)

	responseText, _ := ioutil.ReadAll(response.Body)
	fmt.Println(string(responseText))
}

using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;

namespace OxyApi
{
    class Program
    {
        static async Task Main()
        {
            const string Username = "USERNAME";
            const string Password = "PASSWORD";

            var parameters = new {
                source = "universal",
                url = "https://example.com/",
                parse = true,
                parser_preset = "example_parser"
            };

            var client = new HttpClient();

            Uri baseUri = new Uri("https://realtime.oxylabs.io");
            client.BaseAddress = baseUri;

            var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
            requestMessage.Content = JsonContent.Create(parameters);

            var authenticationString = $"{Username}:{Password}";
            var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
            requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);

            var response = await client.SendAsync(requestMessage);
            var contents = await response.Content.ReadAsStringAsync();

            Console.WriteLine(contents);
        }
    }
}

package org.example;

import okhttp3.*;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;

public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Authorization";
    public static final String USERNAME = "USERNAME";
    public static final String PASSWORD = "PASSWORD";

    public void run() {
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("source", "universal");
        jsonObject.put("url", "https://example.com/");
        jsonObject.put("parse", true);
        jsonObject.put("parser_preset", "example_parser");

        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };

        var client = new OkHttpClient.Builder()
                .authenticator(authenticator)
                .readTimeout(180, TimeUnit.SECONDS)
                .build();

        var mediaType = MediaType.parse("application/json; charset=utf-8");
        var body = RequestBody.create(jsonObject.toString(), mediaType);
        var request = new Request.Builder()
                .url("https://realtime.oxylabs.io/v1/queries")
                .post(body)
                .build();

        try (var response = client.newCall(request).execute()) {
            if (response.body() != null) {
                try (var responseBody = response.body()) {
                    System.out.println(responseBody.string());
                }
            }
        } catch (Exception exception) {
            System.out.println("Error: " + exception.getMessage());
        }

        System.exit(0);
    }

    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}

Output sample

{
  "results": [
    {
      "content": {
        "title": "Example Domain",
        "parse_status_code": 12000
      },
      "created_at": "2025-10-24 10:04:59",
      "updated_at": "2025-10-24 10:05:00",
      "page": 1,
      "url": "https://example.com/",
      "job_id": "7387428891226308609",
      "is_render_forced": false,
      "status_code": 200,
      "type": "parsed",
      "parser_type": "preset",
      "parser_preset": "example_parser"
    }
  ]
}

Getting the HTML content of a parsed job

You may also retrieve the raw HTML result by adding ?type=raw to the end of the result retrieval URL. Read more here.

PreviousList of instructions NextGetting started

Last updated 16 days ago

Was this helpful?

Good night

Quick start

1. Generate a parser

2. Save the parser as a preset

3. Use the parser with API requests

Getting the HTML content of a parsed job