def request(self, url, body=None, headers=None, timeout=45, max_retries=3, method="GET"):
if headers is None:
headers = dict()
parsed = urlparse.urlsplit(url)
origin = (parsed.scheme, parsed.netloc)
for i in range(1, max_retries):
try:
conn = self.create_conn(parsed, origin, timeout)
conn.request(method, url, body=body, headers=headers)
return conn.getresponse()
except IncompleteRead as e:
return e.partial
except Exception as e:
if origin in self.tls.conns:
del self.tls.conns[origin]
if i >= max_retries:
raise e
python类IncompleteRead()的实例源码
def fetchsamples(limit):
ret = []
url = "https://stream.twitter.com/1/statuses/sample.json"
parameters = []
while len(ret) < limit:
try:
response = twitterreq(url, "GET", parameters)
for line in response:
ret.append(line.strip())
if len(ret) % 100 == 0:
print len(ret)
if len(ret) >= limit:
break
except IncompleteRead:
pass
except BadStatusLine:
pass
return ret
# filter tweets for images / good captions and output them to file
def __init__(self, e, uri, format, uriparts):
self.e = e
self.uri = uri
self.format = format
self.uriparts = uriparts
try:
data = self.e.fp.read()
except http_client.IncompleteRead as e:
# can't read the error text
# let's try some of it
data = e.partial
if self.e.headers.get('Content-Encoding') == 'gzip':
buf = StringIO(data)
f = gzip.GzipFile(fileobj=buf)
self.response_data = f.read()
else:
self.response_data = data
super(TwitterHTTPError, self).__init__(str(self))
def open (self, filepart = None, data = None, do_soup = True) :
filepart = filepart or self.nextfile
url = '/'.join (('%s:/' % self.args.protocol, self.args.host, filepart))
if data :
data = data.encode ('ascii')
rq = Request (url, data)
self.f = f = self.opener.open (rq, timeout = 10)
if do_soup :
# Sometimes we get incomplete read. So we read everything
# the server sent us and hope this is ok. Note: This means
# we cannot pass the file to BeautifulSoup but need to read
# everything here.
try:
page = f.read ()
except IncompleteRead as e:
page = e.partial
self.soup = BeautifulSoup (page, "lxml")
self.purl = f.geturl ()
self.info = f.info ()
# end def open
def _getPage(self, url, addParams = {}, post_data = None):
try:
import httplib
def patch_http_response_read(func):
def inner(*args):
try:
return func(*args)
except httplib.IncompleteRead, e:
return e.partial
return inner
prev_read = httplib.HTTPResponse.read
httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read)
except Exception: printExc()
sts, data = self.cm.getPage(url, addParams, post_data)
try: httplib.HTTPResponse.read = prev_read
except Exception: printExc()
return sts, data
def getWeiboContent(self):
weiboContent = ""
try:
req = self.session.get(self.URL, headers = self.myheader)
if req.status_code == 200:
print 'This session work.'
print 'The current Ip is ' + self.getPublicIp()
else:
print 'This session not work with code 200.'
return False
except:
print 'This session not work.'
return False
try:
page = req.content
except httplib.IncompleteRead:
print 'Incompleted!'
return False
# try to use phantomjs
# cmd = 'phantomjs' + ' request.js ' + self.URL + ' '+ str(self.myheader)
# str_body = str(os.popen(cmd).read())
# page = str_body.split('\nbegin\nStatus: success\n')[1]
soupPage = BeautifulSoup(page, 'lxml')
numList = soupPage.find_all('script')
if len(numList) == 0:
print 'you may need to input an access code'
return False
for i in range(0, len(numList)):
IsSearch = re.search(r"\"pid\":\"pl_weibo_direct\"", str(numList[i]))
if IsSearch == None:
continue
else:
weiboContent = str(numList[i])
break
return weiboContent
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def __init__(self, e, uri, format, uriparts):
self.e = e
self.uri = uri
self.format = format
self.uriparts = uriparts
try:
data = self.e.fp.read()
except http_client.IncompleteRead as e:
# can't read the error text
# let's try some of it
data = e.partial
if self.e.headers.get('Content-Encoding') == 'gzip':
buf = StringIO(data)
f = gzip.GzipFile(fileobj=buf)
data = f.read()
if len(data) == 0:
data = {}
else:
data = data.decode('utf8')
if "json" == self.format:
try:
data = json.loads(data)
except ValueError:
# We try to load the response as json as a nicety; if it fails, carry on.
pass
self.response_data = data
super(TwitterHTTPError, self).__init__(str(self))
def _handle_response(self, req, uri, arg_data, _timeout=None):
kwargs = {}
if _timeout:
kwargs['timeout'] = _timeout
try:
handle = urllib_request.urlopen(req, **kwargs)
if handle.headers['Content-Type'] in ['image/jpeg', 'image/png']:
return handle
try:
data = handle.read()
except http_client.IncompleteRead as e:
# Even if we don't get all the bytes we should have there
# may be a complete response in e.partial
data = e.partial
if handle.info().get('Content-Encoding') == 'gzip':
# Handle gzip decompression
buf = StringIO(data)
f = gzip.GzipFile(fileobj=buf)
data = f.read()
if len(data) == 0:
return wrap_response({}, handle.headers)
elif "json" == self.format:
res = json.loads(data.decode('utf8'))
return wrap_response(res, handle.headers)
else:
return wrap_response(
data.decode('utf8'), handle.headers)
except urllib_error.HTTPError as e:
if (e.code == 304):
return []
else:
raise TwitterHTTPError(e, uri, self.format, arg_data)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def test_chunked(self):
chunked_start = (
'HTTP/1.1 200 OK\r\n'
'Transfer-Encoding: chunked\r\n\r\n'
'a\r\n'
'hello worl\r\n'
'1\r\n'
'd\r\n'
)
sock = FakeSocket(chunked_start + '0\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
self.assertEqual(resp.read(), 'hello world')
resp.close()
for x in ('', 'foo\r\n'):
sock = FakeSocket(chunked_start + x)
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead, i:
self.assertEqual(i.partial, 'hello world')
self.assertEqual(repr(i),'IncompleteRead(11 bytes read)')
self.assertEqual(str(i),'IncompleteRead(11 bytes read)')
else:
self.fail('IncompleteRead expected')
finally:
resp.close()
def test_incomplete_read(self):
sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead as i:
self.assertEqual(i.partial, 'Hello\r\n')
self.assertEqual(repr(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertEqual(str(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertTrue(resp.isclosed())
else:
self.fail('IncompleteRead expected')
def test_chunked(self):
chunked_start = (
'HTTP/1.1 200 OK\r\n'
'Transfer-Encoding: chunked\r\n\r\n'
'a\r\n'
'hello worl\r\n'
'1\r\n'
'd\r\n'
)
sock = FakeSocket(chunked_start + '0\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
self.assertEqual(resp.read(), 'hello world')
resp.close()
for x in ('', 'foo\r\n'):
sock = FakeSocket(chunked_start + x)
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead, i:
self.assertEqual(i.partial, 'hello world')
self.assertEqual(repr(i),'IncompleteRead(11 bytes read)')
self.assertEqual(str(i),'IncompleteRead(11 bytes read)')
else:
self.fail('IncompleteRead expected')
finally:
resp.close()
def test_incomplete_read(self):
sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead as i:
self.assertEqual(i.partial, 'Hello\r\n')
self.assertEqual(repr(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertEqual(str(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertTrue(resp.isclosed())
else:
self.fail('IncompleteRead expected')
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def __init__(self, e, uri, format, uriparts):
self.e = e
self.uri = uri
self.format = format
self.uriparts = uriparts
try:
data = self.e.fp.read()
except http_client.IncompleteRead as e:
data = e.partial
if self.e.headers.get('Content-Encoding') == 'gzip':
buf = BytesIO(data)
f = gzip.GzipFile(fileobj=buf)
data = f.read()
if len(data) == 0:
data = {}
else:
data = data.decode('utf-8')
if self.format == 'json':
try:
data = json.loads(data)
except ValueError:
pass
self.response_data = data
super(FanfouHTTPError, self).__init__(str(self))
def _handle_response(self, req, uri, arg_data, _timeout=None):
kwargs = {}
if _timeout:
kwargs['timeout'] = _timeout
try:
handle = urllib_request.urlopen(req, **kwargs)
if handle.headers['Content-Type'] in ['image/jpeg', 'image/png', 'image/gif']:
print(handle.headers['Content-Type'])
return handle
try:
data = handle.read()
except http_client.IncompleteRead as e:
# Even if we don't get all the bytes we should have there
# may be a complete response in e.partial
data = e.partial
if handle.info().get('Content-Encoding') == 'gzip':
# Handle gzip decompression.
buf = BytesIO(data)
f = gzip.GzipFile(fileobj=buf)
data = f.read()
if len(data) == 0:
return wrap_response({}, handle.headers)
elif 'json' == self.format:
res = json.loads(data.decode('utf-8'))
return wrap_response(res, handle.headers)
else:
return wrap_response(
data.decode('utf-8'), handle.headers)
except urllib_error.HTTPError as e:
if (e.code == 304):
return []
else:
raise FanfouHTTPError(e, uri, self.format, arg_data)
def craw(self, root_url, full_path, name):
'''
:param root_url: ???????url
:param full_path: ???????
:param name: ??????
:return:
'''
# self.urls.add_new_url(root_url)
# while self.urls.has_new_url():
# new_url = self.urls.get_new_url()#?url?????url
new_url = root_url
html = None
try:
html = self.downloader.download_list_ph(new_url, name)
except httplib.IncompleteRead as e:
with open(r'list_error.txt', 'a') as f:
f.write(name.encode('utf-8'))
f.write('\n')
if html == None:
return
wechat_url, html_cont = html
acticle_links = self.parser.parse_list(wechat_url, html_cont)
if acticle_links == None:
return
for link in acticle_links:
html = self.downloader.download_articles_ph(link)
data = self.parser.parse_article(html)#?????
if data == None:
continue
(title, wname, date, content, readNum, praise_num, discuss_content, discuss_praise) = data
# self.urls.add_new_urls(new_urls)
# self.outputer.collect_data(data)
self.outputer.output_mongodb(name, data)
# self.outputer.output_file(full_path, data)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def test_chunked(self):
chunked_start = (
'HTTP/1.1 200 OK\r\n'
'Transfer-Encoding: chunked\r\n\r\n'
'a\r\n'
'hello worl\r\n'
'1\r\n'
'd\r\n'
)
sock = FakeSocket(chunked_start + '0\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
self.assertEqual(resp.read(), 'hello world')
resp.close()
for x in ('', 'foo\r\n'):
sock = FakeSocket(chunked_start + x)
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead, i:
self.assertEqual(i.partial, 'hello world')
self.assertEqual(repr(i),'IncompleteRead(11 bytes read)')
self.assertEqual(str(i),'IncompleteRead(11 bytes read)')
else:
self.fail('IncompleteRead expected')
finally:
resp.close()
def test_incomplete_read(self):
sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead as i:
self.assertEqual(i.partial, 'Hello\r\n')
self.assertEqual(repr(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertEqual(str(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertTrue(resp.isclosed())
else:
self.fail('IncompleteRead expected')
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def test_chunked(self):
chunked_start = (
'HTTP/1.1 200 OK\r\n'
'Transfer-Encoding: chunked\r\n\r\n'
'a\r\n'
'hello worl\r\n'
'1\r\n'
'd\r\n'
)
sock = FakeSocket(chunked_start + '0\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
self.assertEqual(resp.read(), 'hello world')
resp.close()
for x in ('', 'foo\r\n'):
sock = FakeSocket(chunked_start + x)
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead, i:
self.assertEqual(i.partial, 'hello world')
self.assertEqual(repr(i),'IncompleteRead(11 bytes read)')
self.assertEqual(str(i),'IncompleteRead(11 bytes read)')
else:
self.fail('IncompleteRead expected')
finally:
resp.close()
def test_incomplete_read(self):
sock = FakeSocket('HTTP/1.1 200 OK\r\nContent-Length: 10\r\n\r\nHello\r\n')
resp = httplib.HTTPResponse(sock, method="GET")
resp.begin()
try:
resp.read()
except httplib.IncompleteRead as i:
self.assertEqual(i.partial, 'Hello\r\n')
self.assertEqual(repr(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertEqual(str(i),
"IncompleteRead(7 bytes read, 3 more expected)")
self.assertTrue(resp.isclosed())
else:
self.fail('IncompleteRead expected')
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)