YouTube Subtitles
Learn how to scrape subtitles (closed captions) from YouTube videos using Web Scraper API. Find out more about its parameters and practical examples.
You can get YouTube subtitles and closed captions (CC) by providing the YouTube video ID and a language_code to the youtube_subtitles source.
Subtitles (closed captions) are separate from transcripts. To extract standard transcripts, use the YouTube Transcript source.
Request samples
The following example demonstrates how to retrieve English closed captions from a YouTube video where captions were provided by the uploader.
curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": [
{
"key": "language_code",
"value": "en"
},
{
"key": "subtitle_origin",
"value": "uploader_provided"
}
]
}'import requests
from pprint import pprint
# Structure payload.
payload = {
'source': 'youtube_subtitles',
'query': 'c4P_YuNnZ7U',
'context': [
{
'key': 'language_code',
'value': 'en'
},
{
'key': 'subtitle_origin',
'value': 'uploader_provided'
}
]
}
# Get response.
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('username', 'password'),
json=payload,
)
# Print the JSON response with the result.
pprint(response.json())const https = require("https");
const username = "USERNAME";
const password = "PASSWORD";
const body = {
source: "youtube_subtitles",
query: "c4P_YuNnZ7U",
context: [
{
key: "language_code",
value: "en",
},
{
key: "subtitle_origin",
value: "uploader_provided",
},
],
};
const options = {
hostname: "realtime.oxylabs.io",
path: "/v1/queries",
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization:
"Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
},
};
const request = https.request(options, (response) => {
let data = "";
response.on("data", (chunk) => {
data += chunk;
});
response.on("end", () => {
const responseData = JSON.parse(data);
console.log(JSON.stringify(responseData, null, 2));
});
});
request.on("error", (error) => {
console.error("Error:", error);
});
request.write(JSON.stringify(body));
request.end();# The whole string you submit has to be URL-encoded.
https://realtime.oxylabs.io/v1/queries?source=youtube_subtitles&query=c4P_YuNnZ7U&context=%5B%7B%22key%22%3A%22language_code%22%2C%22value%22%3A%22en%22%7D%2C%7B%22key%22%3A%22subtitle_origin%22%2C%22value%22%3A%22uploader_provided%22%7D%5D&access_token=12345abcde<?php
$params = array(
'source' => 'youtube_subtitles',
'query' => 'c4P_YuNnZ7U',
'context' => array(
array(
'key' => 'language_code',
'value' => 'en'
),
array(
'key' => 'subtitle_origin',
'value' => 'uploader_provided'
)
)
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "USERNAME"
const Password = "PASSWORD"
// Define the payload with query and context parameters.
payload := map[string]interface{}{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": []map[string]string{
{
"key": "language_code",
"value": "en",
},
{
"key": "subtitle_origin",
"value": "uploader_provided",
},
},
}
jsonValue, err := json.Marshal(payload)
if err != nil {
fmt.Println("Error marshalling JSON:", err)
return
}
client := &http.Client{}
request, err := http.NewRequest("POST", "https://realtime.oxylabs.io/v1/queries", bytes.NewBuffer(jsonValue))
if err != nil {
fmt.Println("Error creating request:", err)
return
}
request.SetBasicAuth(Username, Password)
request.Header.Set("Content-Type", "application/json")
response, err := client.Do(request)
if err != nil {
fmt.Println("Error making request:", err)
return
}
defer response.Body.Close()
responseText, err := ioutil.ReadAll(response.Body)
if err != nil {
fmt.Println("Error reading response:", err)
return
}
fmt.Println(string(responseText))
}using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Text;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "USERNAME";
const string Password = "PASSWORD";
var parameters = new
{
source = "youtube_subtitles",
query = "c4P_YuNnZ7U",
context = new[]
{
new { key = "language_code", value = "en" },
new { key = "subtitle_origin", value = "uploader_provided" }
}
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(Encoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
try
{
var response = await client.SendAsync(requestMessage);
response.EnsureSuccessStatusCode();
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
catch (HttpRequestException e)
{
Console.WriteLine($"Request error: {e.Message}");
}
}
}
}package org.example;
import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "USERNAME";
public static final String PASSWORD = "PASSWORD";
public void run() {
// Construct JSON payload with context parameters
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "youtube_subtitles");
jsonObject.put("query", "c4P_YuNnZ7U");
JSONArray contextArray = new JSONArray();
contextArray.put(new JSONObject().put("key", "language_code").put("value", "en"));
contextArray.put(new JSONObject().put("key", "subtitle_origin").put("value", "uploader_provided"));
jsonObject.put("context", contextArray);
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.readTimeout(180, TimeUnit.SECONDS)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
if (response.body() != null) {
try (var responseBody = response.body()) {
System.out.println(responseBody.string());
}
}
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": [
{
"key": "language_code",
"value": "en"
},
{
"key": "subtitle_origin",
"value": "uploader_provided"
}
]
}Our examples use Realtime integration method. If you would like to use Proxy Endpoint, or asynchronous Push-Pull integration, refer to the Integration Methods section.
Request parameter values
Generic
source
Sets the scraper.
youtube_subtitles
query
YouTube video ID.
–
context:
language_code
Specifies the language of the subtitles. Find available values here.
NOTE: If the provided language_code has no matching subtitles / closed captions in the YouTube video, the result returns a 404 status.
–
context:
subtitle_origin
Specifies whether to retrieve subtitles that are auto_generated or uploader_provided.
–
– mandatory parameter
Last updated
Was this helpful?

