def _parse_rtf_protocol(self, committee_id, meeting_id, bucket, protocol_object_name, parts_object_name, text_object_name):
# currently with the new API - we don't seem to get rtf files anymore
# it looks like files which used to be rtf are actually doc
# need to investigate further
return False
# rtf_extractor = os.environ.get("RTF_EXTRACTOR_BIN")
# if rtf_extractor:
# with object_storage.temp_download(protocol_object_name) as protocol_filename:
# with tempfile.NamedTemporaryFile() as text_filename:
# cmd = rtf_extractor + ' ' + protocol_filename + ' ' + text_filename
# try:
# subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
# protocol_text = fs.read(text_filename)
# with CommitteeMeetingProtocol.get_from_text(protocol_text) as protocol:
# self._parse_protocol_parts(parts_filename, protocol)
# except subprocess.SubprocessError:
# logging.exception("committee {} meeting {}: failed to parse rtf file, skipping".format(committee_id,
# meeting_id))
# return False
# return True
# else:
# logging.warning("missing RTF_EXTRACTOR_BIN environment variable, skipping rtf parsing")
# return False
parse_committee_meeting_protocols.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录