1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
| from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory, BrowserLikeContextFactory from twisted.internet import reactor from twisted.web.client import Agent from twisted.web.http_headers import Headers
def success_callback(response): print('Response version:', response.version) print('Response code:', response.code) print('Response phrase:', response.phrase) print('Response headers:') print('Response received', response)
def fail_callback(err): print('Something error', err)
def do_something(response, request): print(request.__dict__)
def to_bytes(text, encoding=None, errors='strict'): """Return the binary representation of ``text``. If ``text`` is already a bytes object, return it as-is.""" if isinstance(text, bytes): return text if not isinstance(text, str): raise TypeError('to_bytes must receive a str or bytes ' 'object, got %s' % type(text).__name__) if encoding is None: encoding = 'utf-8' return text.encode(encoding, errors)
def get_request(url, method="GET", timeout=10): agent = Agent(reactor, contextFactory=ScrapyClientContextFactory()) agent = Agent(reactor, contextFactory=BrowserLikeContextFactory()) d = agent.request( to_bytes(method, encoding='ascii'), to_bytes(url), Headers({b'accept': [b'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'], b'accept-language': [b'en'], b'user-agent': [b'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'], b'accept-encoding': [b'gzip, deflate']}), None) d.addCallback(success_callback) d.addCallback(do_something, Headers) d.addErrback(fail_callback) timeout_cl = reactor.callLater(timeout, d.cancel) print(timeout_cl.active()) d.addBoth(lambda _: reactor.stop())
if __name__ == '__main__': url = "https://blog.csdn.net" get_request(url=url) reactor.run()
|