python http 客户端 libcurl urllib3 httpx

创建日期: 2023-11-07 15:24 | 作者: 风波 | 浏览次数: 14 | 分类: Python

1. httpx

来源:https://www.python-httpx.org/quickstart/ 参考:https://www.python-httpx.org/

GET

import httpx

r = httpx.get('https://httpbin.org/get')

POST

r = httpx.post('https://httpbin.org/post', data={'key': 'value'})

PUT, DELETE, HEAD, and OPTIONS

r = httpx.put('https://httpbin.org/put', data={'key': 'value'})
r = httpx.delete('https://httpbin.org/delete')
r = httpx.head('https://httpbin.org/get')
r = httpx.options('https://httpbin.org/get')

Passing Parameters in URLs

params = {'key1': 'value1', 'key2': 'value2'}
r = httpx.get('https://httpbin.org/get', params=params)

Response Content

r = httpx.get('https://www.example.org/')
r.text

Binary Response Content

r.content

2. libcurl

yum install -y libcurl-devel
import pycurl

def get_curl_agent(self, timeout=120):
    curl = pycurl.Curl()
    curl.setopt(pycurl.IPRESOLVE, 1)
    curl.setopt(pycurl.NOPROGRESS, 1)
    curl.setopt(pycurl.TIMEOUT_MS, timeout * 1000)
    #curl.setopt(pycurl.HTTPHEADER, headrs)
    curl.setopt(pycurl.MAXREDIRS, 50) 
    try:
        curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS)
    except Exception as e:
        logging.info("set pycurl.CURL_HTTP_VERSION_2TLS: {}".format(str(e)))
    curl.setopt(pycurl.SSL_VERIFYPEER, 0)
    curl.setopt(pycurl.SSL_VERIFYHOST, 0)
    curl.setopt(pycurl.ACCEPT_ENCODING, "") 
    curl.setopt(pycurl.TCP_KEEPALIVE, 1)
    #curl.setopt(pycurl.COOKIEFILE, "not-exists.txt")
    #curl.setopt(pycurl.VERBOSE, 1)
    curl.setopt(pycurl.FOLLOWLOCATION, 1)

    return curl


def request(self, url, headers=None, method='GET', json=None):
    hds = list()
    for k, v in headers.items():
        hds.append("{}: {}".format(k, v)) 
    curl = self.curl
    curl.setopt(pycurl.HTTPHEADER, hds)
    #url = url.strip()
    #url = url.strip('"')
    #url = urllib.parse.quote(url, safe=':/?&=%[]')
    curl.setopt(pycurl.URL, url)
    logger.debug("libcurl request: {} ...".format(url))
    content = curl.perform_rb()
    status_code = curl.getinfo(pycurl.RESPONSE_CODE)

    o = httpagent.O(status_code=status_code, content=content)
    return o

3. urllib3

参考:https://www.webscrapingapi.com/top-3-python-http-clients-for-web-scraping

import urllib3

http = urllib3.PoolManager()
resp = http.request(method, url, headers=headers, body=body)

设置超时 来源:https://stackoverflow.com/questions/74736943/how-can-i-set-a-timeout-for-urllib3-request

from urllib3 import Timeout, Poolmanager

timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')

或者

response = http.request('GET', 'http://example.com/', timeout=Timeout(10))

4. Requests

DNS 比较弱啊

5. httplib2

6. Uplink

7. GRequests

14 浏览
12 爬虫
0 评论