1. httpx
来源:https://www.python-httpx.org/quickstart/ 参考:https://www.python-httpx.org/
GET
import httpx
r = httpx.get('https://httpbin.org/get')
POST
r = httpx.post('https://httpbin.org/post', data={'key': 'value'})
PUT, DELETE, HEAD, and OPTIONS
r = httpx.put('https://httpbin.org/put', data={'key': 'value'})
r = httpx.delete('https://httpbin.org/delete')
r = httpx.head('https://httpbin.org/get')
r = httpx.options('https://httpbin.org/get')
Passing Parameters in URLs
params = {'key1': 'value1', 'key2': 'value2'}
r = httpx.get('https://httpbin.org/get', params=params)
Response Content
r = httpx.get('https://www.example.org/')
r.text
Binary Response Content
r.content
2. libcurl
yum install -y libcurl-devel
import pycurl
def get_curl_agent(self, timeout=120):
curl = pycurl.Curl()
curl.setopt(pycurl.IPRESOLVE, 1)
curl.setopt(pycurl.NOPROGRESS, 1)
curl.setopt(pycurl.TIMEOUT_MS, timeout * 1000)
#curl.setopt(pycurl.HTTPHEADER, headrs)
curl.setopt(pycurl.MAXREDIRS, 50)
try:
curl.setopt(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS)
except Exception as e:
logging.info("set pycurl.CURL_HTTP_VERSION_2TLS: {}".format(str(e)))
curl.setopt(pycurl.SSL_VERIFYPEER, 0)
curl.setopt(pycurl.SSL_VERIFYHOST, 0)
curl.setopt(pycurl.ACCEPT_ENCODING, "")
curl.setopt(pycurl.TCP_KEEPALIVE, 1)
#curl.setopt(pycurl.COOKIEFILE, "not-exists.txt")
#curl.setopt(pycurl.VERBOSE, 1)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
return curl
def request(self, url, headers=None, method='GET', json=None):
hds = list()
for k, v in headers.items():
hds.append("{}: {}".format(k, v))
curl = self.curl
curl.setopt(pycurl.HTTPHEADER, hds)
#url = url.strip()
#url = url.strip('"')
#url = urllib.parse.quote(url, safe=':/?&=%[]')
curl.setopt(pycurl.URL, url)
logger.debug("libcurl request: {} ...".format(url))
content = curl.perform_rb()
status_code = curl.getinfo(pycurl.RESPONSE_CODE)
o = httpagent.O(status_code=status_code, content=content)
return o
3. urllib3
参考:https://www.webscrapingapi.com/top-3-python-http-clients-for-web-scraping
import urllib3
http = urllib3.PoolManager()
resp = http.request(method, url, headers=headers, body=body)
设置超时 来源:https://stackoverflow.com/questions/74736943/how-can-i-set-a-timeout-for-urllib3-request
from urllib3 import Timeout, Poolmanager
timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')
或者
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
4. Requests
DNS 比较弱啊