YouTube Transcript

You can get YouTube video transcripts by providing a YouTube video ID and a language_code to the youtube_transcript source. (Note: Transcripts are not closed captions (CC).)

Request samples

The following examples demonstrate how to retrieve the English transcript of a YouTube video, specifically when the transcript was provided by the uploader.

curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
        "source": "youtube_transcript",
        "query": "SLoqvcnwwN4",
        "context": [
            {
                "key": "language_code",
                "value": "en"
            },
            {
                "key": "transcript_origin",
                "value": "uploader_provided"
            }
        ]
    }'

import requests
from pprint import pprint

# Structure payload.
payload = {
    'source': 'youtube_transcript',
    'query': 'SLoqvcnwwN4',
    'context': [
        {
            'key': 'language_code',
            'value': 'en'
        },
        {
            'key': 'transcript_origin',
            'value': 'uploader_provided'
        }
    ]
}

# Get response.
response = requests.request(
    'POST',
    'https://realtime.oxylabs.io/v1/queries',
    auth=('user', 'pass1'),
    json=payload,
)

# Print the JSON response with the result.
pprint(response.json())

const https = require("https");

const username = "USERNAME";
const password = "PASSWORD";
const body = {
    source: "youtube_transcript",
    query: "SLoqvcnwwN4",
    context: [
        {
            key: "language_code",
            value: "en",
        },
        {
            key: "transcript_origin",
            value: "uploader_provided",
        },
    ],
};

const options = {
    hostname: "realtime.oxylabs.io",
    path: "/v1/queries",
    method: "POST",
    headers: {
        "Content-Type": "application/json",
        Authorization:
            "Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
    },
};

const request = https.request(options, (response) => {
    let data = "";

    response.on("data", (chunk) => {
        data += chunk;
    });

    response.on("end", () => {
        const responseData = JSON.parse(data);
        console.log(JSON.stringify(responseData, null, 2));
    });
});

request.on("error", (error) => {
    console.error("Error:", error);
});

request.write(JSON.stringify(body));
request.end();

# The whole string you submit has to be URL-encoded.

https://realtime.oxylabs.io/v1/queries?source=youtube_transcript&query=SLoqvcnwwN4&context=%5B%7B%22key%22%3A%22language_code%22%2C%22value%22%3A%22en%22%7D%2C%7B%22key%22%3A%22transcript_origin%22%2C%22value%22%3A%22uploader_provided%22%7D%5D&access_token=12345abcde

<?php

$params = array(
    'source' => 'youtube_transcript',
    'query' => 'SLoqvcnwwN4',
    'context' => array(
        array(
            'key' => 'language_code',
            'value' => 'en'
        ),
        array(
            'key' => 'transcript_origin',
            'value' => 'uploader_provided'
        )
    )
);

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");

$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

$result = curl_exec($ch);
echo $result;

if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}

curl_close($ch);

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
)

func main() {
	const Username = "USERNAME"
	const Password = "PASSWORD"

	// Define the payload with query and context parameters.
	payload := map[string]interface{}{
		"source": "youtube_transcript",
		"query":  "SLoqvcnwwN4",
		"context": []map[string]string{
			{
				"key":   "language_code",
				"value": "en",
			},
			{
				"key":   "transcript_origin",
				"value": "uploader_provided",
			},
		},
	}

	jsonValue, err := json.Marshal(payload)
	if err != nil {
		fmt.Println("Error marshalling JSON:", err)
		return
	}

	client := &http.Client{}
	request, err := http.NewRequest("POST", "https://realtime.oxylabs.io/v1/queries", bytes.NewBuffer(jsonValue))
	if err != nil {
		fmt.Println("Error creating request:", err)
		return
	}

	request.SetBasicAuth(Username, Password)
	request.Header.Set("Content-Type", "application/json")

	response, err := client.Do(request)
	if err != nil {
		fmt.Println("Error making request:", err)
		return
	}
	defer response.Body.Close()

	responseText, err := ioutil.ReadAll(response.Body)
	if err != nil {
		fmt.Println("Error reading response:", err)
		return
	}

	fmt.Println(string(responseText))
}

using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Text;
using System.Threading.Tasks;

namespace OxyApi
{
    class Program
    {
        static async Task Main()
        {
            const string Username = "USERNAME";
            const string Password = "PASSWORD";

            var parameters = new
            {
                source = "youtube_transcript",
                query = "SLoqvcnwwN4",
                context = new[]
                {
                    new { key = "language_code", value = "en" },
                    new { key = "transcript_origin", value = "uploader_provided" }
                }
            };

            var client = new HttpClient();

            Uri baseUri = new Uri("https://realtime.oxylabs.io");
            client.BaseAddress = baseUri;

            var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
            requestMessage.Content = JsonContent.Create(parameters);

            var authenticationString = $"{Username}:{Password}";
            var base64EncodedAuthenticationString = Convert.ToBase64String(Encoding.UTF8.GetBytes(authenticationString));
            requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);

            try
            {
                var response = await client.SendAsync(requestMessage);
                response.EnsureSuccessStatusCode();

                var contents = await response.Content.ReadAsStringAsync();
                Console.WriteLine(contents);
            }
            catch (HttpRequestException e)
            {
                Console.WriteLine($"Request error: {e.Message}");
            }
        }
    }
}

package org.example;

import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;

public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Authorization";
    public static final String USERNAME = "USERNAME";
    public static final String PASSWORD = "PASSWORD";

    public void run() {
        // Construct JSON payload with context parameters
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("source", "youtube_transcript");
        jsonObject.put("query", "SLoqvcnwwN4");

        JSONArray contextArray = new JSONArray();
        contextArray.put(new JSONObject().put("key", "language_code").put("value", "en"));
        contextArray.put(new JSONObject().put("key", "transcript_origin").put("value", "uploader_provided"));

        jsonObject.put("context", contextArray);

        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };

        var client = new OkHttpClient.Builder()
                .authenticator(authenticator)
                .readTimeout(180, TimeUnit.SECONDS)
                .build();

        var mediaType = MediaType.parse("application/json; charset=utf-8");
        var body = RequestBody.create(jsonObject.toString(), mediaType);
        var request = new Request.Builder()
                .url("https://realtime.oxylabs.io/v1/queries")
                .post(body)
                .build();

        try (var response = client.newCall(request).execute()) {
            if (response.body() != null) {
                try (var responseBody = response.body()) {
                    System.out.println(responseBody.string());
                }
            }
        } catch (Exception exception) {
            System.out.println("Error: " + exception.getMessage());
        }

        System.exit(0);
    }

    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}

{
    "source": "youtube_transcript",
    "query": "SLoqvcnwwN4",
    "context": [
        {
            "key": "language_code",
            "value": "en"
        },
        {
            "key": "transcript_origin",
            "value": "uploader_provided"
        }
    ]
}

We use synchronous Realtime integration method in our examples. If you would like to use Proxy Endpoint, or asynchronous Push-Pull integration, refer to the Integration Methods section.

47KB

YT_transcript_output.json

Open

Request parameter values

Generic

Parameter

Description

Default Value

source

Sets the scraper.

youtube_transcript

query

YouTube video ID.

context: language_code

Specifies the language of the transcript. Find available values here. NOTE: If the provided language_code has no matching transcript in the YouTube video, the result returns a 404 status.

context: transcript_origin(optional)

Specifies whether to retrieve transcripts that are auto_generated or uploader_provided.

auto_generated

callback_url

URL to your callback endpoint. More info.

- mandatory parameter

PreviousYouTube Downloader NextYouTube Metadata

Last updated 11 months ago

Was this helpful?

Good night

hashtagRequest samples

hashtagRequest parameter values

hashtagGeneric

Request samples

Request parameter values

Generic