函数示例
HTML处理
element_text
element_text
示例HTML
<!DOCTYPE html>
<html>
<body>
<div id="product">
<div id="product-description">This is a nice product</div>
<div id="product-price"> 12 3
</div>
</div>
</body>
</html>
从HTML元素中提取文本并去除空白
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//*[@id='product-price']"]
},
{
"_fn": "element_text"
}
]
}
}
{
"price": "12 3"
}
给出一个字符串值作为输入,不做任何事情
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//*[@id='product-price']/text()"]
},
{
"_fn": "element_text"
}
]
}
}
{
"price": " 12 3\n\n\n "
}
xpath
xpath
示例HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
获取所有描述项
{
"description_items": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
}
]
}
}
{
"description_items": ["Very", "Nice", "Socks"]
}
获取第一个描述项
{
"first_description_item": {
"_fns": [
{
"_fn": "xpath",
"_args": ["(//li[@class='description-item'])[1]/text()"]
}
]
}
}
{
"first_description_item": [
"Very"
]
}
检查描述部分的元素是否存在
{
"description_section_exists": {
"_fns": [
{
"_fn": "xpath",
"_args": ["boolean(//div[@class='description'])"]
}
]
}
}
{
"description_section_exists": true
}
以数字形式获取价格
{
"price": {
"_fns": [
{
"_fn": "xpath",
"_args": ["number(//div[@class='price'])"]
}
]
}
}
{
"description_section_exists": 123.12
}
当前序表达式失败时,可退回到多个表达式。
{
"price": {
"_fns": [
{
"_fn": "xpath",
"_args": [
"//div[@class='product-price']/text()", <--- this does not find anything
"//div[@class='price']/text()" <--- this finds the target price
]
}
]
}
}
{
"price": [
"123.12"
]
}
XPath | 操作符,可与多个表达式匹配
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//div[@class='price']/text() | //div[@class='title']/text()"]
}
]
}
}
{
"price_and_title": [
"Socks",
"123.12"
]
}
xpath_one
xpath_one
示例HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
返回第一个匹配结果
{
"first_description_item": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//li/text()"]
}
]
}
}
{
"first_description_item": "Very"
}
使用XSLT函数
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": ["number(.//div[@class='price'])"]
}
]
}
}
{
"price": 123.12
}
字符串操作
amount_from_string
amount_from_string
示例HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">The price is: 123.12 pesos</div>
</div>
</body>
从字符串中提取数量
{
"price": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='price']/text()"]
},
{
"_fn": "amount_from_string"
}
]
}
}
{
"price": 123.12
}
amount_range_from_string
amount_range_from_string
示例HTML
<body>
<div class="product">
<div class="price">
The price is: 123.12 pesos;
The price is: 345.12 pesos;
The price is: 678.12 pesos
</div>
</div>
</body>
从字符串中提取所有数量
{
"prices": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='price']/text()"]
},
{
"_fn": "amount_range_from_string"
}
]
}
}
{
"prices": [
123.12,
345.12,
678.12
]
}
join
join
示例HTML
<body>
<div class="product">
<div class="price">
The price is: 123.12 pesos;
</div>
<div class="price">
The price is: 345.12 pesos;
</div>
<div class="price">
The price is: 678.12 pesos
</div>
</div>
</body>
将一个字符串数组连接成一个单一的字符串
{
"price_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{ // If we call normalize-space() in first pipeline function,
// it will return only the first value.
"_fn": "xpath",
"_args": ["normalize-space(text())"]
},
{
"_fn": "join",
"_args": ""
}
]
}
}
{
"price_variants": "The price is: 123.12 pesos;The price is: 345.12 pesos;The price is: 678.12 pesos"
}
regex_find_all
regex_find_all
示例HTML
<body>
<div class="product">
<div class="description">
[one description]
[two description]
[three description]
</div>
</div>
</body>
查找两个字符之间的所有匹配项
{
"descriptions": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_find_all",
"_args": ["\\[(.*)\\]"]
}
]
}
}
{
"descriptions": [
"one description",
"two description",
"three description"
]
}
regex_search
regex_search
示例HTML
<body>
<div class="product">
<div class="description">
[one description]
[two description]
[three description]
{the one i need}
</div>
</div>
</body>
返回两个字符之间的描述
{
"description": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_search",
"_args": ["{(.*)}", 1]
}
]
}
}
{
"description": "the one i need"
}
regex_substring
regex_substring
示例HTML
<body>
<div class="product">
<div class="description">
* one description
* two description
* three description
* {this one i would like to get replaced}
</div>
</div>
</body>
用指定的值替换文本的一部分
{
"descriptions": {
"_fns": [
{
"_fn": "xpath_one",
"_args": [".//div[@class='description']/text()"]
},
{
"_fn": "regex_substring",
"_args": ["{this one i would like to get replaced}", "four description"]
},
{
"_fn": "regex_find_all",
"_args": ["\\*\\s(.*)\n"]
}
]
}
}
{
"descriptions": [
"one description",
"two description",
"three description",
"four description"
]
}
常用函数
convert_to_*
convert_to_*
示例HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
获取价格变体的数量
{
"price_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "length"
}
]
}
}
{
"price_variants": 5
}
在一个多维数组中获取价格变体的数量
示例HTML
<body>
<div class="product">
<property class="colors">
<option class="color">Red</option>
<option class="color">Green</option>
<option class="color">Blue</option>
</property>
<property class="sizes">
<option class="size">S</option>
<option class="size">M</option>
<option class="size">L</option>
<option class="size">XL</option>
</property>
</div>
</body>
{
"number_of_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//property"]
},
{
"_fn": "xpath",
"_args": [".//option"]
},
{
"_fn": "length"
}
]
}
}
{
"number_of_variants": [
3,
3
]
}
select_nth
select_nth
示例HTML
<body>
<div class="product" id="socks">
<div class="title">Socks</div>
<div class="price">123.12</div>
<div class="description">
<ul>
<li class="description-item">Very</li>
<li class="description-item">Nice</li>
<li class="description-item">Socks</li>
</ul>
</div>
</div>
</body>
从数组中选择第一个描述项
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
},
{
"_fn": "select_nth",
"_args": 0
}
]
}
}
{
"price_and_title": "Very"
}
从数组中选择最后一个描述项
{
"price_and_title": {
"_fns": [
{
"_fn": "xpath",
"_args": ["//li[@class='description-item']/text()"]
},
{
"_fn": "select_nth",
"_args": -1
}
]
}
}
{
"price_and_title": "Socks"
}
数学函数
average
average
示例HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
找到所有列表中价格的平均值
{
"price_average": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "average"
}
]
}
}
{
"price_average": 244.8
}
max
max
示例HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
找到所有列表中价格的最大值
{
"price_max": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "max"
}
]
}
}
{
"price_max": 456.0
}
min
min
示例HTML
<body>
<div class="product">
<div class="price">123</div>
<div class="price">124</div>
<div class="price">456</div>
<div class="price">421</div>
<div class="price">100</div>
</div>
</body>
找到所有列表中价格的平均值
{
"price_min": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//div[@class='price']"]
},
{
"_fn": "xpath_one",
"_args": ["number(text())"]
},
{
"_fn": "min"
}
]
}
}
{
"price_min": 100.0
}
product
product
示例HTML
<body>
<div class="product">
<property class="colors">
<option class="color">Red</option>
<option class="color">Green</option>
<option class="color">Blue</option>
</property>
<property class="sizes">
<option class="size">S</option>
<option class="size">M</option>
<option class="size">L</option>
<option class="size">XL</option>
</property>
</div>
</body>
获得不同产品变体的数量
{
"number_of_variants": {
"_fns": [
{
"_fn": "xpath",
"_args": [".//property"]
},
{
"_fn": "xpath",
"_args": [".//option"]
},
{
"_fn": "length"
},
{
"_fn": "product"
}
]
}
}
{
"number_of_variants": 12
}
Last updated