YouTube 字幕
了解如何使用 Web Scraper API 抓取 YouTube 视频的字幕(隐藏式字幕)。了解参数和实用示例。
您可以通过提供 YouTube 视频 ID 和一个 language_code 到 youtube_subtitles 源进行抓取。
字幕(封闭字幕)与转录文本是分开的。要提取标准转录,请使用 YouTube 文本转录 源进行抓取。
请求示例
下面的示例演示如何从上传者提供字幕的 YouTube 视频中检索英文封闭字幕。
curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": [
{
"key": "language_code",
"value": "en"
},
{
"key": "subtitle_origin",
"value": "uploader_provided"
}
]
}'import requests
from pprint import pprint
# 构建负载(payload)。
payload = {
'source': 'youtube_subtitles',
'query': 'c4P_YuNnZ7U',
'context': [
{
'key': 'language_code',
'value': 'en'
},
{
'key': 'subtitle_origin',
'value': 'uploader_provided'
}
]
}
# 获取响应。
response = requests.request(
'POST',
'https://realtime.oxylabs.io/v1/queries',
auth=('username', 'password'),
json=payload,
)
# 打印包含结果的 JSON 响应。
pprint(response.json())const https = require("https");
const username = "USERNAME";
const password = "PASSWORD";
const body = {
source: "youtube_subtitles",
query: "c4P_YuNnZ7U",
context: [
{
key: "language_code",
value: "en",
},
{
key: "subtitle_origin",
value: "uploader_provided",
},
],
};
const options = {
hostname: "realtime.oxylabs.io",
path: "/v1/queries",
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization:
"Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
},
};
const request = https.request(options, (response) => {
let data = "";
response.on("data", (chunk) => {
data += chunk;
});
response.on("end", () => {
const responseData = JSON.parse(data);
console.log(JSON.stringify(responseData, null, 2));
});
});
request.on("error", (error) => {
console.error("Error:", error);
});
request.write(JSON.stringify(body));
request.end();# 您提交的整个字符串必须进行 URL 编码。
https://realtime.oxylabs.io/v1/queries?source=youtube_subtitles&query=c4P_YuNnZ7U&context=%5B%7B%22key%22%3A%22language_code%22%2C%22value%22%3A%22en%22%7D%2C%7B%22key%22%3A%22subtitle_origin%22%2C%22value%22%3A%22uploader_provided%22%7D%5D&access_token=12345abcde<?php
$params = array(
'source' => 'youtube_subtitles',
'query' => 'c4P_YuNnZ7U',
'context' => array(
array(
'key' => 'language_code',
'value' => 'en'
),
array(
'key' => 'subtitle_origin',
'value' => 'uploader_provided'
)
)
);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");
$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close($ch);package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
func main() {
const Username = "USERNAME"
const Password = "PASSWORD"
// 使用 query 和 context 参数定义负载。
payload := map[string]interface{}{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": []map[string]string{
{
"key": "language_code",
"value": "en",
},
{
"key": "subtitle_origin",
"value": "uploader_provided",
},
},
}
jsonValue, err := json.Marshal(payload)
if err != nil {
fmt.Println("Error marshalling JSON:", err)
return
}
client := &http.Client{}
request, err := http.NewRequest("POST", "https://realtime.oxylabs.io/v1/queries", bytes.NewBuffer(jsonValue))
if err != nil {
fmt.Println("Error creating request:", err)
return
}
request.SetBasicAuth(Username, Password)
request.Header.Set("Content-Type", "application/json")
response, err := client.Do(request)
if err != nil {
fmt.Println("Error making request:", err)
return
}
defer response.Body.Close()
responseText, err := ioutil.ReadAll(response.Body)
if err != nil {
fmt.Println("Error reading response:", err)
return
}
fmt.Println(string(responseText))
}using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Text;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main()
{
const string Username = "USERNAME";
const string Password = "PASSWORD";
var parameters = new
{
source = "youtube_subtitles",
query = "c4P_YuNnZ7U",
context = new[]
{
new { key = "language_code", value = "en" },
new { key = "subtitle_origin", value = "uploader_provided" }
}
};
var client = new HttpClient();
Uri baseUri = new Uri("https://realtime.oxylabs.io");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
requestMessage.Content = JsonContent.Create(parameters);
var authenticationString = $"{Username}:{Password}";
var base64EncodedAuthenticationString = Convert.ToBase64String(Encoding.UTF8.GetBytes(authenticationString));
requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);
try
{
var response = await client.SendAsync(requestMessage);
response.EnsureSuccessStatusCode();
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
catch (HttpRequestException e)
{
Console.WriteLine($"Request error: {e.Message}");
}
}
}
}package org.example;
import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Authorization";
public static final String USERNAME = "USERNAME";
public static final String PASSWORD = "PASSWORD";
public void run() {
// 使用 context 参数构建 JSON 负载
JSONObject jsonObject = new JSONObject();
jsonObject.put("source", "youtube_subtitles");
jsonObject.put("query", "c4P_YuNnZ7U");
JSONArray contextArray = new JSONArray();
contextArray.put(new JSONObject().put("key", "language_code").put("value", "en"));
contextArray.put(new JSONObject().put("key", "subtitle_origin").put("value", "uploader_provided"));
jsonObject.put("context", contextArray);
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
var client = new OkHttpClient.Builder()
.authenticator(authenticator)
.readTimeout(180, TimeUnit.SECONDS)
.build();
var mediaType = MediaType.parse("application/json; charset=utf-8");
var body = RequestBody.create(jsonObject.toString(), mediaType);
var request = new Request.Builder()
.url("https://realtime.oxylabs.io/v1/queries")
.post(body)
.build();
try (var response = client.newCall(request).execute()) {
if (response.body() != null) {
try (var responseBody = response.body()) {
System.out.println(responseBody.string());
}
}
} catch (Exception exception) {
System.out.println("Error: " + exception.getMessage());
}
System.exit(0);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}{
"source": "youtube_subtitles",
"query": "c4P_YuNnZ7U",
"context": [
{
"key": "language_code",
"value": "en"
},
{
"key": "subtitle_origin",
"value": "uploader_provided"
}
]
}我们的示例使用 Realtime 集成方法。 如果您想使用 Proxy Endpoint, 或异步的 Push-Pull 集成,请参阅 集成方法 部分。
请求参数值
通用
– 必填参数
最后更新于
这有帮助吗?

