protected File requestData(CrawleableUri uri, File outputFile)
throws ClientProtocolException, FileNotFoundException, IOException {
HttpGet request = null;
request = new HttpGet(uri.getUri());
request.addHeader(HttpHeaders.ACCEPT,
MapUtils.getString(uri.getData(), Constants.URI_HTTP_ACCEPT_HEADER, acceptHeader));
request.addHeader(HttpHeaders.ACCEPT_CHARSET,
MapUtils.getString(uri.getData(), Constants.URI_HTTP_ACCEPT_HEADER, acceptCharset));
HttpEntity entity = null;
CloseableHttpResponse response = null;
OutputStream os = null;
try {
response = client.execute(request);
// Handle response headers (especially the status and the content type)
for (Header header : response.getAllHeaders()) {
uri.addData(HTTP_RESPONSE_HEADER_PREFIX + header.getName(), header.getValue());
}
StatusLine status = response.getStatusLine();
uri.addData(Constants.URI_HTTP_STATUS_CODE, status.getStatusCode());
if ((status.getStatusCode() < 200) || (status.getStatusCode() >= 300)) {
LOGGER.info("Response of \"{}\" has the wrong status ({}). Returning null.", uri, status.toString());
return null;
}
Header contentTypeHeader = response.getFirstHeader(HttpHeaders.CONTENT_TYPE);
if (contentTypeHeader != null) {
String typeValues[] = contentTypeHeader.getValue().split(";");
uri.addData(Constants.URI_HTTP_MIME_TYPE_KEY, typeValues[0]);
// If the content type contains a charset
if (typeValues.length > 0) {
uri.addData(Constants.URI_HTTP_CHARSET_KEY, typeValues[1]);
}
} else {
LOGGER.info("The response did not contain a content type header.");
}
// store response data
entity = response.getEntity();
InputStream is = entity.getContent();
os = new BufferedOutputStream(new FileOutputStream(outputFile));
StreamUtils.copy(is, os);
} finally {
IOUtils.closeQuietly(os);
if (entity != null) {
try {
EntityUtils.consume(entity);
} catch (IOException e1) {
}
}
if (response != null) {
try {
response.close();
} catch (IOException e) {
}
}
}
uri.addData(Constants.URI_DATA_FILE_NAME, outputFile.getAbsolutePath());
return outputFile;
}
HTTPFetcher.java 文件源码
java
阅读 40
收藏 0
点赞 0
评论 0
项目:Squirrel
作者:
评论列表
文章目录