代理端口
总览
如果您曾经使用过普通代理执行数据抓取,集成代理端点交付方式将是一件轻而易举的事。您需要做的就是使用我们的输入节点作为代理,用爬虫API 凭据进行验证并忽略证书。在 cURL
中,它是 -k
或 --insecure
。数据将在同一个开放的连接上到达您的手中。
代理端点只适用于基于 URL 的数据源,其中提供了完整 URL。因此,它只接受少数额外的作业参数,这些参数应作为标头发送。
端点
GET realtime.oxylabs.io:60000
输入
请查看以下请求示例。
curl -k -x realtime.oxylabs.io:60000 \
-U USERNAME:PASSWORD \
-H "x-oxylabs-user-agent-type: desktop_chrome" \
-H "x-oxylabs-geo-location: Germany" \
'https://www.example.com'
import requests
from pprint import pprint
# Define proxy dict. Don't forget to put your real user and pass here as well.
proxies = {
'http': 'http://YOUR_USERNAME:YOUR_PASSWORD@realtime.oxylabs.io:60000',
}
# To set a specific geo-location, user-agent or to render Javascript
# It is required to send parameters as request headers
headers = {
"x-oxylabs-user-agent-type": "desktop_chrome",
"x-oxylabs-geo-location": "Germany",
#"X-Oxylabs-Render": "html", # Uncomment you want to render JavaScript within the page
}
response = requests.request(
'GET',
'https://www.example.com',
headers = headers, # Passing defined headers
verify=False, # Or accept our certificate.
proxies=proxies,
)
# Print result page to stdout
pprint(response.text)
# Save returned HTML to result.html file
with open('result.html', 'w') as f:
f.write(response.text)
<?php
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://www.example.com/");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_PROXY, 'realtime.oxylabs.io:60000');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "YOUR_USERNAME" . ":" . "YOUR_PASSWORD");
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
// To set a specific geo-location, user-agent or to render Javascript
// It is required to send parameters as request headers
curl_setopt_array($ch, array(
CURLOPT_HTTPHEADER => array(
'x-oxylabs-user-agent-type: desktop_chrome',
'x-oxylabs-geo-location: Germany',
//'X-Oxylabs-Render: html', // Uncomment you want to render JavaScript within the page
)
));
$result = curl_exec($ch);
echo $result;
if (curl_errno($ch)) {
echo 'Error:' . curl_error($ch);
}
curl_close ($ch);
?>
using System;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
namespace OxyApi
{
class Program
{
static async Task Main(string[] args)
{
var webProxy = new WebProxy
{
Address = new Uri($"http://realtime.oxylabs.io:60000"),
BypassProxyOnLocal = false,
UseDefaultCredentials = false,
Credentials = new NetworkCredential(
userName: "YOUR_USERNAME",
password: "YOUR_PASSWORD"
)
};
var httpClientHandler = new HttpClientHandler
{
Proxy = webProxy,
};
// We recommend accepting our certificate instead of allowing insecure (http) traffic
httpClientHandler.ClientCertificateOptions = ClientCertificateOption.Manual;
httpClientHandler.ServerCertificateCustomValidationCallback =
(httpRequestMessage, cert, cetChain, policyErrors) =>
{
return true;
};
var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
Uri baseUri = new Uri("https://www.example.com");
client.BaseAddress = baseUri;
var requestMessage = new HttpRequestMessage(HttpMethod.Get, "");
var response = await client.SendAsync(requestMessage);
var contents = await response.Content.ReadAsStringAsync();
Console.WriteLine(contents);
}
}
}
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
const Username = "YOUR_USERNAME"
const Password = "YOUR_PASSWORD"
proxyUrl, _ := url.Parse(
fmt.Sprintf(
"http://%s:%s@realtime.oxylabs.io:60000",
Username,
Password,
),
)
customTransport := &http.Transport{Proxy: http.ProxyURL(proxyUrl)}
// We recommend accepting our certificate instead of allowing insecure (http) traffic
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{Transport: customTransport}
request, _ := http.NewRequest("GET",
"https://www.example.com",
nil,
)
request.Header.Add("x-oxylabs-user-agent-type", "desktop_chrome")
request.Header.Add("x-oxylabs-geo-location", "Germany")
request.SetBasicAuth(Username, Password)
response, _ := client.Do(request)
responseText, _ := ioutil.ReadAll(response.Body)
fmt.Println(string(responseText))
}
import okhttp3.Authenticator;
import okhttp3.Credentials;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import javax.net.ssl.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
public class Main implements Runnable {
private static final String AUTHORIZATION_HEADER = "Proxy-Authorization";
public static final String USERNAME = "YOUR_USERNAME";
public static final String PASSWORD = "YOUR_PASSWORD";
public void run() {
Authenticator authenticator = (route, response) -> {
String credential = Credentials.basic(USERNAME, PASSWORD);
return response
.request()
.newBuilder()
.header(AUTHORIZATION_HEADER, credential)
.build();
};
OkHttpClient.Builder builder = new OkHttpClient.Builder();
// We recommend accepting our certificate instead of allowing insecure (http) traffic
this.disableSSLCertificateChecking(builder);
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("realtime.oxylabs.io", 60000));
var client = builder
.proxy(proxy)
.proxyAuthenticator(authenticator)
.build();
var request = new Request.Builder()
.url("https://www.example.com")
.addHeader("x-oxylabs-user-agent-type", "desktop_chrome")
.addHeader("x-oxylabs-geo-location", "Germany")
.get()
.build();
try (var response = client.newCall(request).execute()) {
assert response.body() != null;
System.out.println(response.body().string());
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
System.exit(0);
}
private void disableSSLCertificateChecking(OkHttpClient.Builder builder) {
TrustManager[] trustManagers = new TrustManager[]{
new X509TrustManager() {
@Override
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return new java.security.cert.X509Certificate[]{};
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] x509Certificates, String authType) {
}
}
};
try {
HttpsURLConnection.setDefaultHostnameVerifier((s, sslSession) -> true);
SSLContext sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, trustManagers, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
builder.sslSocketFactory(sslContext.getSocketFactory(), (X509TrustManager) trustManagers[0]);
} catch (Exception exception) {
exception.printStackTrace();
System.exit(1);
}
builder.hostnameVerifier((hostname, session) -> true);
}
public static void main(String[] args) {
new Thread(new Main()).start();
}
}
import fetch from 'node-fetch';
import createHttpsProxyAgent from 'https-proxy-agent'
const username = 'YOUR_USERNAME';
const password = 'YOUR_PASSWORD';
const agent = createHttpsProxyAgent(
`http://${username}:${password}@realtime.oxylabs.io:60000`
);
// We recommend accepting our certificate instead of allowing insecure (http) traffic
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = 0;
const headers = {
'x-oxylabs-user-agent-type': 'desktop_chrome',
'x-oxylabs-geo-location': 'Germany',
}
const response = await fetch('https://www.example.com', {
method: 'get',
headers: headers,
agent: agent,
});
console.log(await response.text());
输出
下面您将看到一个来自 https://example.com
的回复样本:
<!doctype html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
div {
width: 600px;
margin: 5em auto;
padding: 2em;
background-color: #fdfdff;
border-radius: 0.5em;
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
}
a:link, a:visited {
color: #38488f;
text-decoration: none;
}
@media (max-width: 700px) {
div {
margin: 0 auto;
width: auto;
}
}
</style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</html>
接受参数
在提出请求(连同 URL)时,您可以向我们发送一些我们将在执行您的作业时使用的工作参数。工作参数应该在您的请求表头中发送 - 请参阅此处的示例。
下面是您可以用代理端点请求发送的作业参数列表:
Last updated