News Search

Scrape Google News results on a large scale and get completely parsed data. Extract articles with titles, sources, and publication dates.

The google_search source is designed to retrieve Google Search results (SERPs). This sub-page specifically presents data related to Google News Search. To explore other result types, read here: Web Search, Image Search.

To scrape Google News search, include the context:udm parameter with value set to 12 or context:tbm parameter with the value set to nws.

Explore output data dictionary for each News SERP feature, offering a brief description, screenshot, parsed JSON code snippet, and a table defining each parsed field. Navigate through the details using the right-side navigation or scrolling down the page.

Request samples

In the examples below, we make a request to obtain News search result pages for the search term adidas on the google.nl domain.

udm

curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
        "source": "google_search",
        "domain": "nl",
        "query": "adidas",
        "parse": true,
        "context": [
            {
                "key": "udm",
                "value": "12"
            }
        ]
    }'

import requests
from pprint import pprint

# Structure payload.
payload = {
    'source': 'google_search',
    'domain': 'nl',
    'query': 'adidas',
    'parse': True,
    'context': [
        {'key': 'udm', 'value': '12'},
    ],
}

# Get response.
response = requests.post(
    'https://realtime.oxylabs.io/v1/queries',
    auth=('USERNAME', 'PASSWORD'),
    json=payload,
)

# Print prettified response to stdout.
pprint(response.json())

const https = require("https");

const username = "USERNAME";
const password = "PASSWORD";
const body = {
    source: "google_search",
    domain: "nl",
    query: "adidas",
    parse: true,
    context: [
        { key: "udm", value: "12" },
    ],
};

const options = {
    hostname: "realtime.oxylabs.io",
    path: "/v1/queries",
    method: "POST",
    headers: {
        "Content-Type": "application/json",
        Authorization:
            "Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
    },
};

const request = https.request(options, (response) => {
    let data = "";

    response.on("data", (chunk) => {
        data += chunk;
    });

    response.on("end", () => {
        const responseData = JSON.parse(data);
        console.log(JSON.stringify(responseData, null, 2));
    });
});

request.on("error", (error) => {
    console.error("Error:", error);
});

request.write(JSON.stringify(body));
request.end();

<?php

$params = array(
    'source' => 'google_search',
    'domain' => 'nl',
    'query' => 'adidas',
    'parse' => true,
    'context' => [
        [
            'key' => 'udm',
            'value' => '12',
        ]
    ]
);

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");


$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

$result = curl_exec($ch);
echo $result;

if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close($ch);

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
)

func main() {
	const Username = "USERNAME"
	const Password = "PASSWORD"

	payload := map[string]interface{}{
		"source": "google_search",
		"domain": "nl",
		"query":  "adidas",
		"parse":  true,
		"context": []map[string]interface{}{
			{"key": "udm", "value": "12"},
		},
	}

	jsonValue, _ := json.Marshal(payload)

	client := &http.Client{}
	request, _ := http.NewRequest("POST",
		"https://realtime.oxylabs.io/v1/queries",
		bytes.NewBuffer(jsonValue),
	)

	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)

	responseText, _ := ioutil.ReadAll(response.Body)
	fmt.Println(string(responseText))
}

using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;

namespace OxyApi
{
    class Program
    {
        static async Task Main()
        {
            const string Username = "USERNAME";
            const string Password = "PASSWORD";

            var parameters = new {
                source = "google_search",
                domain = "nl",
                query = "adidas",
                parse = true,
                context = new dynamic [] {
                    new { key = "udm", value = "12" },
                }
            };

            var client = new HttpClient();

            Uri baseUri = new Uri("https://realtime.oxylabs.io");
            client.BaseAddress = baseUri;

            var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
            requestMessage.Content = JsonContent.Create(parameters);

            var authenticationString = $"{Username}:{Password}";
            var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
            requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);

            var response = await client.SendAsync(requestMessage);
            var contents = await response.Content.ReadAsStringAsync();

            Console.WriteLine(contents);
        }
    }
}

package org.example;

import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;

public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Authorization";
    public static final String USERNAME = "USERNAME";
    public static final String PASSWORD = "PASSWORD";

    public void run() {
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("source", "google_search");
        jsonObject.put("domain", "nl");
        jsonObject.put("query", "adidas");
        jsonObject.put("parse", true);
        jsonObject.put("context", new JSONArray()
                .put(new JSONObject()
                        .put("key", "udm")
                        .put("value", "12"))
        );

        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };

        var client = new OkHttpClient.Builder()
                .authenticator(authenticator)
                .readTimeout(180, TimeUnit.SECONDS)
                .build();

        var mediaType = MediaType.parse("application/json; charset=utf-8");
        var body = RequestBody.create(jsonObject.toString(), mediaType);
        var request = new Request.Builder()
                .url("https://realtime.oxylabs.io/v1/queries")
                .post(body)
                .build();

        try (var response = client.newCall(request).execute()) {
            if (response.body() != null) {
                try (var responseBody = response.body()) {
                    System.out.println(responseBody.string());
                }
            }
        } catch (Exception exception) {
            System.out.println("Error: " + exception.getMessage());
        }

        System.exit(0);
    }

    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}

tbm

curl 'https://realtime.oxylabs.io/v1/queries' \
--user 'USERNAME:PASSWORD' \
-H 'Content-Type: application/json' \
-d '{
        "source": "google_search",
        "domain": "nl",
        "query": "adidas",
        "parse": true,
        "context": [
            {
                "key": "tbm",
                "value": "nws"
            }
        ]
    }'

import requests
from pprint import pprint

# Structure payload.
payload = {
    'source': 'google_search',
    'domain': 'nl',
    'query': 'adidas',
    'parse': True,
    'context': [
        {'key': 'tbm', 'value': 'nws'},
    ],
}

# Get response.
response = requests.post(
    'https://realtime.oxylabs.io/v1/queries',
    auth=('USERNAME', 'PASSWORD'),
    json=payload,
)

# Print prettified response to stdout.
pprint(response.json())

const https = require("https");

const username = "USERNAME";
const password = "PASSWORD";
const body = {
    source: "google_search",
    domain: "nl",
    query: "adidas",
    parse: true,
    context: [
        { key: "tbm", value: "nws" },
    ],
};

const options = {
    hostname: "realtime.oxylabs.io",
    path: "/v1/queries",
    method: "POST",
    headers: {
        "Content-Type": "application/json",
        Authorization:
            "Basic " + Buffer.from(`${username}:${password}`).toString("base64"),
    },
};

const request = https.request(options, (response) => {
    let data = "";

    response.on("data", (chunk) => {
        data += chunk;
    });

    response.on("end", () => {
        const responseData = JSON.parse(data);
        console.log(JSON.stringify(responseData, null, 2));
    });
});

request.on("error", (error) => {
    console.error("Error:", error);
});

request.write(JSON.stringify(body));
request.end();

<?php

$params = array(
    'source' => 'google_search',
    'domain' => 'nl',
    'query' => 'adidas',
    'parse' => true,
    'context' => [
        [
            'key' => 'tbm',
            'value' => 'nws',
        ]
    ]
);

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, "https://realtime.oxylabs.io/v1/queries");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($params));
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_USERPWD, "USERNAME" . ":" . "PASSWORD");


$headers = array();
$headers[] = "Content-Type: application/json";
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);

$result = curl_exec($ch);
echo $result;

if (curl_errno($ch)) {
    echo 'Error:' . curl_error($ch);
}
curl_close($ch);

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"net/http"
)

func main() {
	const Username = "USERNAME"
	const Password = "PASSWORD"

	payload := map[string]interface{}{
		"source": "google_search",
		"domain": "nl",
		"query":  "adidas",
		"parse":  true,
		"context": []map[string]interface{}{
			{"key": "tbm", "value": "nws"},
		},
	}

	jsonValue, _ := json.Marshal(payload)

	client := &http.Client{}
	request, _ := http.NewRequest("POST",
		"https://realtime.oxylabs.io/v1/queries",
		bytes.NewBuffer(jsonValue),
	)

	request.SetBasicAuth(Username, Password)
	response, _ := client.Do(request)

	responseText, _ := ioutil.ReadAll(response.Body)
	fmt.Println(string(responseText))
}

using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Json;
using System.Threading.Tasks;

namespace OxyApi
{
    class Program
    {
        static async Task Main()
        {
            const string Username = "USERNAME";
            const string Password = "PASSWORD";

            var parameters = new {
                source = "google_search",
                domain = "nl",
                query = "adidas",
                parse = true,
                context = new dynamic [] {
                    new { key = "tbm", value = "nws" },
                }
            };

            var client = new HttpClient();

            Uri baseUri = new Uri("https://realtime.oxylabs.io");
            client.BaseAddress = baseUri;

            var requestMessage = new HttpRequestMessage(HttpMethod.Post, "/v1/queries");
            requestMessage.Content = JsonContent.Create(parameters);

            var authenticationString = $"{Username}:{Password}";
            var base64EncodedAuthenticationString = Convert.ToBase64String(System.Text.ASCIIEncoding.UTF8.GetBytes(authenticationString));
            requestMessage.Headers.Add("Authorization", "Basic " + base64EncodedAuthenticationString);

            var response = await client.SendAsync(requestMessage);
            var contents = await response.Content.ReadAsStringAsync();

            Console.WriteLine(contents);
        }
    }
}

package org.example;

import okhttp3.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.concurrent.TimeUnit;

public class Main implements Runnable {
    private static final String AUTHORIZATION_HEADER = "Authorization";
    public static final String USERNAME = "USERNAME";
    public static final String PASSWORD = "PASSWORD";

    public void run() {
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("source", "google_search");
        jsonObject.put("domain", "nl");
        jsonObject.put("query", "adidas");
        jsonObject.put("parse", true);
        jsonObject.put("context", new JSONArray()
                .put(new JSONObject()
                        .put("key", "tbm")
                        .put("value", "nws"))
        );

        Authenticator authenticator = (route, response) -> {
            String credential = Credentials.basic(USERNAME, PASSWORD);
            return response
                    .request()
                    .newBuilder()
                    .header(AUTHORIZATION_HEADER, credential)
                    .build();
        };

        var client = new OkHttpClient.Builder()
                .authenticator(authenticator)
                .readTimeout(180, TimeUnit.SECONDS)
                .build();

        var mediaType = MediaType.parse("application/json; charset=utf-8");
        var body = RequestBody.create(jsonObject.toString(), mediaType);
        var request = new Request.Builder()
                .url("https://realtime.oxylabs.io/v1/queries")
                .post(body)
                .build();

        try (var response = client.newCall(request).execute()) {
            if (response.body() != null) {
                try (var responseBody = response.body()) {
                    System.out.println(responseBody.string());
                }
            }
        } catch (Exception exception) {
            System.out.println("Error: " + exception.getMessage());
        }

        System.exit(0);
    }

    public static void main(String[] args) {
        new Thread(new Main()).start();
    }
}

We use synchronous Realtime integration method in our examples. If you would like to use Proxy Endpoint or asynchronous Push-Pull integration, refer to the integration methods section.

Request parameter values

Generic

Basic setup and customization options for scraping Google News search results.

Parameter

Description

Default Value

source

Sets the scraper.

google_search

query

The keyword or phrase to search for.

context: udm

To get News search results, set value to 12. Find other accepted values here.

context: tbm

To get News search results, set value to nws. Other accepted values are: app, blg, bks, dsc, isch, pts, plcs, rcp, lcl

render

Enables JavaScript rendering when set to html. More info.

parse

Returns parsed data when set to true. Explore output data dictionary.

false

callback_url

URL to your callback endpoint. More info.

user_agent_type

Device type and browser. The full list can be found here.

desktop

- mandatory parameter

- udm and tbm context parameters cannot be used together in a single scraping request; please select one of them. Using both simultaneously may lead to conflicts or unexpected behavior.

Google Advanced Search Operators

When scraping, you might find it useful to combine Google advanced search operators with your query. It enables you to customize the scope of the search, ensuring that the results are more relevant and focused. Explore these special commands here and here. See an example below.

{
    "source": "google_search",
    "query": "iphone 15 launch inurl:apple",
}

Localization

Adapt search results to specific geographical locations, domains, and languages.

Parameter

Description

Default Value

geo_location

The geographical location that the result should be adapted for. Using this parameter correctly is extremely important to get the right data. For more information, read about our suggested geo_location parameter structures here.

domain

Domain localization for Google. The full list of available domains can be found here.

com

locale

Accept-Language header value which changes your Google search page web interface language. More info.

Pagination

Controls for managing the pagination and retrieval of search results.

Parameter

Description

Default Value

start_page

Starting page number.

1

pages

Number of pages to retrieve.

1

limit

Number of results to retrieve in each page.

10

context:

limit_per_page

If you want to scrape multiple pages with the same IP address, include a JSON array and specify the page numbers using the page key. You must also indicate the number of organic results on each page by adding a limit key. See example.

Limit per page

To use this feature, include a JSON array with JSON objects containing the following data:

Parameter

Description

Example

page

The number of the page you would like to scrape. Any integer value greater than 0 will work

1

limit

The number of results on the page in question. Any integer value between 1 and 100 (inclusive) will work.

90

Request sample

{
    "source": "google_search",
    "query": "adidas",
    "parse": true,
    "context": [
        {
            "key": "limit_per_page",
            "value": [
                {"page": 1, "limit": 10},
                {"page": 2, "limit": 90}
                    ]
        }]
}

Filtering

Options to filter and refine search results based on various criteria.

Parameter

Description

Default Value

context:safe_search

Safe search. Set to true to enable it.

false

context: tbs

tbs parameter. This parameter is like a container for more obscure google parameters, like limiting/sorting results by date as well as other filters some of which depend on the tbm parameter (e.g. tbs=app_os:1 is only available with tbm value app). More info here.

Other

Additional advanced settings and controls for specialized requirements.

Parameter

Description

Default Value

context: nfpr

true will turn off spelling auto-correction

false

Context parameters

All context parameters should be added to the context array as objects with key and value pairs, e.g.:

...
"context": [
    {
        "key": "filter",
        "value": "0"
    }
]
...

Structured data

SERP Scraper API is capable of extracting either an HTML or JSON object that contains Google search results, offering structured data on various elements of the results page.

google_search news structured output

{
    "results": [
        {
            "content": {
                "url": "https://www.google.com/search?q=adidas&tbm=nws&uule=w+CAIQICINdW5pdGVkIHN0YXRlcw&gl=us&hl=en",
                "page": 1,
                "results": {
                    "main": [
                        {
                            "url": "https://www.cnn.com/2022/05/06/business/under-armour-stock-adidas-nike/index.html",
                            "desc": "Tripped-up supply chains and a coronavirus surge in China are causing \nheadaches for top athletic brands.",
                            "title": "Wall Street is fed up with Under Armour, Nike and Adidas",
                            "source": "CNN",
                            "pos_overall": 1,
                            "relative_publish_date": "2 days ago"
                        },
                        ...
                        {
                            "url": "https://www.cnbc.com/2022/05/06/dsw-tests-layout-to-spotlight-brands-like-adidas-crocs-birkenstock.html",
                            "desc": "DSW is trying out a new store look and layout at a location opening this \nweekend in Houston, in an attempt to focus customers' attention on...",
                            "title": "DSW is testing a store layout that puts the spotlight on brands like \nAdidas, Crocs and Birkenstock",
                            "source": "CNBC",
                            "pos_overall": 10,
                            "relative_publish_date": "2 days ago"
                        }
                    ],
                    "total_results_count": 57300000
                },
                "parse_status_code": 12000
            },
            "created_at": "2022-05-09 07:25:03",
            "updated_at": "2022-05-09 07:25:07",
            "page": 1,
            "url": "https://www.google.com/search?q=adidas&tbm=nws&uule=w+CAIQICINdW5pdGVkIHN0YXRlcw&gl=us&hl=en",
            "job_id": "6929330379711060993",
            "status_code": 200,
            "parser_type": "v2"
        }
    ]
}

We only parse news search results for desktop searches.

Output data dictionary

HTML example

JSON structure

The Google News Search structured output includes fields like URL, page, results, and others. The table below presents a detailed list of each SERP feature we parse, along with its description and data type. The table also includes some metadata.

The number of items and fields for a specific result type may vary depending on the search query.

Key

Description

Type

url

The URL of the Google search page.

string

results

A dictionary containing the results of the search.

array

results.main

A list of unpaid news results with their respective details.

array

results.additional

A list of trending articles with their respective details.

object

results.total_results_count

The total number of results found for the search query.

array

parse_status_code

The status code of the parsing job. You can see the parser status codes described here.

integer

created_at

The timestamp when the scraping job was created.

timestamp

updated_at

The timestamp when the scraping job was finished.

timestamp

page

Page number relative to the Google SERP pagination.

integer

job_id

The ID of the job associated with the scraping job.

string

status_code

The status code of the scraping job. You can see the scraper status codes described here.

integer

In the following sections, parsed JSON code snippets are shortened where more than one item for the result type is available.

Main

Displays a list of unpaid news results, providing relevant details for each article.

...
"main": [
    {
        "url": "https://www.yahoo.com/lifestyle/tiger-woods-nikes-epic-partnership-015311819.html",
        "desc": "That there could ever be a world in which Tiger Woods wasn't sponsored by \nNike seemed...",
        "title": "How Tiger Woods and Nike's Epic Partnership Fell Apart",
        "source": "Yahoo",
        "pos_overall": 1,
        "relative_publish_date": "1 day ago"
    },
                       ...
},

...

Key (results.main)

Description

Type

url

The URL to the full article.

string

desc

A short excerpt from the full article.

string

title

The title of the article.

string

source

The name of the website where the article is published.

string

pos_overall

Indicates the overall position of the result within the main results of News SERP.

integer

relative_publish_date

Describes how long ago the article was published.

string

Additional

Presents a list of trending articles, accompanied by relevant details.

...
"additional": [
    {
        "items": [
            {
                "pos": 1,
                "url": "https://www.complex.com/sneakers/a/brendan-dunne/nike-book-1-colorways-haven-hike-rattlesnake",
                "title": "Nike Book 1 Colorways Haven Hike Rattlesnake",
                "source": "Complex",
                "relative_publish_date": "1 day ago"
            },
         ...
        ],
        "pos_overall": 2,
        "section_title": "Devin Booker confirms issues with Nike Book 1 launch"
    }
...

Key (results.additional)

Description

Type

items

A list of articles with their respective details.

array

items.pos

A unique indicator denoting the article position in the list.

integer

items.url

The URL to the full article.

string

items.title

The title of the article.

string

items.source

The name of the website where the article is published.

string

items.relative_publish_date

Describes how long ago the article was published.

string

pos_overall

Indicates the overall position of the result within the additional results of News SERP.

integer

section_title

The name of the additional section.

string

PreviousImage Search NextLocal Search

Last updated 4 months ago

Was this helpful?

Good night

Request samples

udm

tbm

Request parameter values

Generic

Google Advanced Search Operators

Localization

Pagination

Limit per page

Request sample

Filtering

Other

Context parameters

Structured data

Output data dictionary

HTML example

JSON structure

Main

Additional