Function examples
HTML processing
element_text
element_text
Sample HTML
<!DOCTYPE html>
<html>
<body>
<div id="product">
<div id="product-description">This is a nice product</div>
<div id="product-price"> 12 3
</div>
</div>
</body>
</html>
Extract text from HTML element and strip whitespaces
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//*[@id='product-price']"]
},
{
"_fn": "element_text"
}
]
}
}
{
"price": "12 3"
}
Given a string value as an input, do nothing
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//*[@id='product-price']/text()"]
},
{
"_fn": "element_text"
}
]
}
}
{
"price": " 12 3\n\n\n "
}
xpath
xpath
Sample HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
Get all description items
{
"description_items": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
}
]
}
}
{
"description_items": ["Very", "Nice", "Socks"]
}
Get the first description item
{
"first_description_item": {
"_fns": [
{
"_fn": "xpath",
"_args": ["(//li[@class='description-item'])[1]/text()"]
}
]
}
}
{
"first_description_item": [
"Very"
]
}
Check if the description section element exists
{
"description_section_exists": {
"_fns": [
{
"_fn": "xpath",
"_args": ["boolean(//div[@class='description'])"]
}
]
}
}
{
"description_section_exists": true
}
Get price as a number
{
"price": {
"_fns": [
{
"_fn": "xpath",
"_args": ["number(//div[@class='price'])"]
}
]
}
}
{
"description_section_exists": 123.12
}
Multiple expressions to fallback to in case preceding expression fails
{
"price": {
"_fns": [
{
"_fn": "xpath",
"_args": [
"//div[@class='product-price']/text()", <--- this does not find anything
"//div[@class='price']/text()" <--- this finds the target price
]
}
]
}
}
{
"price": [
"123.12"
]
}
XPath |
operator to match with multiple expressions
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//div[@class='price']/text() | //div[@class='title']/text()"]
}
]
}
}
{
"price_and_title": [
"Socks",
"123.12"
]
}
xpath_one
xpath_one
Sample HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
Return the first match
{
"first_description_item": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//li/text()"]
}
]
}
}
{
"first_description_item": "Very"
}
Using XSLT functions
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": ["number(.//div[@class='price'])"]
}
]
}
}
{
"price": 123.12
}
String manipulation
amount_from_string
amount_from_string
Sample HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">The price is: 123.12 pesos</div>
</div>
</body>
Extract amount from string
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='price']/text()"]
},
{
"_fn": "amount_from_string"
}
]
}
}
{
"price": 123.12
}
amount_range_from_string
amount_range_from_string
Sample HTML
<body>
<div class="product">
<div class="price">
The price is: 123.12 pesos;
The price is: 345.12 pesos;
The price is: 678.12 pesos
</div>
</div>
</body>
Extract all amounts from string
{
"prices": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='price']/text()"]
},
{
"_fn": "amount_range_from_string"
}
]
}
}
{
"prices": [
123.12,
345.12,
678.12
]
}
join
join
Sample HTML
<body>
<div class="product">
<div class="price">
The price is: 123.12 pesos;
</div>
<div class="price">
The price is: 345.12 pesos;
</div>
<div class="price">
The price is: 678.12 pesos
</div>
</div>
</body>
Join an array of strings into a single string
{
"price_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{ // If we call normalize-space() in first pipeline function,
// it will return only the first value.
"_fn": "xpath",
"_args": ["normalize-space(text())"]
},
{
"_fn": "join",
"_args": ""
}
]
}
}
{
"price_variants": "The price is: 123.12 pesos;The price is: 345.12 pesos;The price is: 678.12 pesos"
}
regex_find_all
regex_find_all
Sample HTML
<body>
<div class="product">
<div class="description">
[one description]
[two description]
[three description]
</div>
</div>
</body>
Find all matches between two characters
{
"descriptions": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_find_all",
"_args": ["\\[(.*)\\]"]
}
]
}
}
{
"descriptions": [
"one description",
"two description",
"three description"
]
}
regex_search
regex_search
Sample HTML
<body>
<div class="product">
<div class="description">
[one description]
[two description]
[three description]
{the one i need}
</div>
</div>
</body>
Return description between two characters
{
"description": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_search",
"_args": ["{(.*)}", 1]
}
]
}
}
{
"description": "the one i need"
}
regex_substring
regex_substring
Sample HTML
<body>
<div class="product">
<div class="description">
* one description
* two description
* three description
* {this one i would like to get replaced}
</div>
</div>
</body>
Replace a part of text with specified value
{
"descriptions": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_substring",
"_args": ["{this one i would like to get replaced}", "four description"]
},
{
"_fn": "regex_find_all",
"_args": ["\\*\\s(.*)\n"]
}
]
}
}
{
"descriptions": [
"one description",
"two description",
"three description",
"four description"
]
}
Common functions
convert_to_*
convert_to_*
Sample HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
Get the count of price variants
{
"price_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "length"
}
]
}
}
{
"price_variants": 5
}
Get the count of price variants in a multi-dimensional array
Sample HTML:
<body>
<div class="product">
<property class="colors">
<option class="color">Red</option>
<option class="color">Green</option>
<option class="color">Blue</option>
</property>
<property class="sizes">
<option class="size">S</option>
<option class="size">M</option>
<option class="size">L</option>
<option class="size">XL</option>
</property>
</div>
</body>
{
"number_of_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//property"]
},
{
"_fn": "xpath",
"_args": [".//option"]
},
{
"_fn": "length"
}
]
}
}
{
"number_of_variants": [
3,
3
]
}
select_nth
select_nth
Sample HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
Select the first description item from the array
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
},
{
"_fn": "select_nth",
"_args": 0
}
]
}
}
{
"price_and_title": "Very"
}
Select the last description item from the array
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
},
{
"_fn": "select_nth",
"_args": -1
}
]
}
}
{
"price_and_title": "Socks"
}
Math functions
average
average
Sample HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
Find the average of all listed prices
{
"price_average": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "average"
}
]
}
}
{
"price_average": 244.8
}
max
max
Sample HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
Find the max of all listed prices
{
"price_max": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "max"
}
]
}
}
{
"price_max": 456.0
}
min
min
Sample HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
Find the average of all listed prices
{
"price_min": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "min"
}
]
}
}
{
"price_min": 100.0
}
product
product
Sample HTML
<body>
<div class="product">
<property class="colors">
<option class="color">Red</option>
<option class="color">Green</option>
<option class="color">Blue</option>
</property>
<property class="sizes">
<option class="size">S</option>
<option class="size">M</option>
<option class="size">L</option>
<option class="size">XL</option>
</property>
</div>
</body>
Get the count of different product variants
{
"number_of_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//property"]
},
{
"_fn": "xpath",
"_args": [".//option"]
},
{
"_fn": "length"
},
{
"_fn": "product"
}
]
}
}
{
"number_of_variants": 12
}
Last updated