Function examples

HTML processing


Sample HTML

<!DOCTYPE html>
    <div id="product">
        <div id="product-description">This is a nice product</div>
        <div id="product-price">    12  3


Extract text from HTML element and strip whitespaces

    "price": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//*[@id='product-price']"]
                "_fn": "element_text"
    "price": "12  3"

Given a string value as an input, do nothing

    "price": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//*[@id='product-price']/text()"]
                "_fn": "element_text"
    "price": "    12  3\n\n\n        "


Sample HTML

    <div class="product" id="socks">
        <div class="title">Socks</div>
        <div class="price">123.12</div>
        <div class="description">
                <li class="description-item">Very</li>
                <li class="description-item">Nice</li>
                <li class="description-item">Socks</li>

Get all description items

    "description_items": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["//li[@class='description-item']/text()"]
    "description_items": ["Very", "Nice", "Socks"]

Get the first description item

    "first_description_item": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["(//li[@class='description-item'])[1]/text()"]
    "first_description_item": [

Check if the description section element exists

    "description_section_exists": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["boolean(//div[@class='description'])"]
    "description_section_exists": true

Get price as a number

    "price": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["number(//div[@class='price'])"]
    "description_section_exists": 123.12

Multiple expressions to fallback to in case preceding expression fails

    "price": {
        "_fns": [
                "_fn": "xpath",
                "_args": [
                    "//div[@class='product-price']/text()", <--- this does not find anything
                    "//div[@class='price']/text()" <--- this finds the target price
    "price": [

XPath | operator to match with multiple expressions

    "price_and_title": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["//div[@class='price']/text() | //div[@class='title']/text()"]
    "price_and_title": [


Sample HTML

    <div class="product" id="socks">
        <div class="title">Socks</div>
        <div class="price">123.12</div>
        <div class="description">
                <li class="description-item">Very</li>
                <li class="description-item">Nice</li>
                <li class="description-item">Socks</li>

Return the first match

    "first_description_item": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//li/text()"]
    "first_description_item": "Very"

Using XSLT functions

    "price": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": ["number(.//div[@class='price'])"]
    "price": 123.12

String manipulation


Sample HTML

    <div class="product" id="socks">
        <div class="title">Socks</div>
        <div class="price">The price is: 123.12 pesos</div>

Extract amount from string

    "price": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//div[@class='price']/text()"]
                "_fn": "amount_from_string"
    "price": 123.12


Sample HTML

    <div class="product">
        <div class="price">
            The price is: 123.12 pesos;
            The price is: 345.12 pesos;
            The price is: 678.12 pesos

Extract all amounts from string

    "prices": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//div[@class='price']/text()"]
                "_fn": "amount_range_from_string"
    "prices": [


Sample HTML

    <div class="product">
        <div class="price">
            The price is: 123.12 pesos;
        <div class="price">
            The price is: 345.12 pesos;
        <div class="price">
            The price is: 678.12 pesos

Join an array of strings into a single string

    "price_variants": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//div[@class='price']"]
            {  // If we call normalize-space() in first pipeline function, 
               // it will return only the first value.
                "_fn": "xpath",
                "_args": ["normalize-space(text())"]
                "_fn": "join",
                "_args": ""
    "price_variants": "The price is: 123.12 pesos;The price is: 345.12 pesos;The price is: 678.12 pesos"


Sample HTML

    <div class="product">
        <div class="description">
            [one description]
            [two description]
            [three description]

Find all matches between two characters

    "descriptions": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//div[@class='description']/text()"]
                "_fn": "regex_find_all",
                "_args": ["\\[(.*)\\]"]
    "descriptions": [
        "one description",
        "two description",
        "three description"

Sample HTML

    <div class="product">
        <div class="description">
            [one description]
            [two description]
            [three description]
            {the one i need}

Return description between two characters

    "description": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//div[@class='description']/text()"]
                "_fn": "regex_search",
                "_args": ["{(.*)}", 1]
    "description": "the one i need"


Sample HTML

    <div class="product">
        <div class="description">
            * one description
            * two description
            * three description
            * {this one i would like to get replaced}

Replace a part of text with specified value

    "descriptions": {
        "_fns": [
                "_fn": "xpath_one",
                "_args": [".//div[@class='description']/text()"]
                "_fn": "regex_substring",
                "_args": ["{this one i would like to get replaced}", "four description"]
                "_fn": "regex_find_all",
                "_args": ["\\*\\s(.*)\n"]
    "descriptions": [
        "one description",
        "two description",
        "three description",
        "four description"

Common functions


Sample HTML

    <div class="product">
        <div class="price">123</div>
        <div class="price">124</div>
        <div class="price">456</div>
        <div class="price">421</div>
        <div class="price">100</div>

Get the count of price variants

    "price_variants": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//div[@class='price']"]
                "_fn": "length"
    "price_variants": 5

Get the count of price variants in a multi-dimensional array

Sample HTML:

    <div class="product">
        <property class="colors">
            <option class="color">Red</option>
            <option class="color">Green</option>
            <option class="color">Blue</option>
        <property class="sizes">
            <option class="size">S</option>
            <option class="size">M</option>
            <option class="size">L</option>
            <option class="size">XL</option>
    "number_of_variants": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//property"]
                "_fn": "xpath",
                "_args": [".//option"]
                "_fn": "length"
    "number_of_variants": [


Sample HTML

    <div class="product" id="socks">
        <div class="title">Socks</div>
        <div class="price">123.12</div>
        <div class="description">
                <li class="description-item">Very</li>
                <li class="description-item">Nice</li>
                <li class="description-item">Socks</li>

Select the first description item from the array

    "price_and_title": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["//li[@class='description-item']/text()"]
                "_fn": "select_nth",
                "_args": 0
    "price_and_title": "Very"

Select the last description item from the array

    "price_and_title": {
        "_fns": [
                "_fn": "xpath",
                "_args": ["//li[@class='description-item']/text()"]
                "_fn": "select_nth",
                "_args": -1
    "price_and_title": "Socks"

Math functions


Sample HTML

    <div class="product">
        <div class="price">123</div>
        <div class="price">124</div>
        <div class="price">456</div>
        <div class="price">421</div>
        <div class="price">100</div>

Find the average of all listed prices

    "price_average": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//div[@class='price']"]
                "_fn": "xpath_one",
                "_args": ["number(text())"]
                "_fn": "average"
    "price_average": 244.8


Sample HTML

    <div class="product">
        <div class="price">123</div>
        <div class="price">124</div>
        <div class="price">456</div>
        <div class="price">421</div>
        <div class="price">100</div>

Find the max of all listed prices

    "price_max": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//div[@class='price']"]
                "_fn": "xpath_one",
                "_args": ["number(text())"]
                "_fn": "max"
    "price_max": 456.0


Sample HTML

    <div class="product">
        <div class="price">123</div>
        <div class="price">124</div>
        <div class="price">456</div>
        <div class="price">421</div>
        <div class="price">100</div>

Find the average of all listed prices

    "price_min": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//div[@class='price']"]
                "_fn": "xpath_one",
                "_args": ["number(text())"]
                "_fn": "min"
    "price_min": 100.0


Sample HTML

    <div class="product">
        <property class="colors">
            <option class="color">Red</option>
            <option class="color">Green</option>
            <option class="color">Blue</option>
        <property class="sizes">
            <option class="size">S</option>
            <option class="size">M</option>
            <option class="size">L</option>
            <option class="size">XL</option>

Get the count of different product variants

    "number_of_variants": {
        "_fns": [
                "_fn": "xpath",
                "_args": [".//property"]
                "_fn": "xpath",
                "_args": [".//option"]
                "_fn": "length"
                "_fn": "product"
    "number_of_variants": 12

Last updated

Was this helpful?