def _infer_text_fragment_inner(self, title, body, post_id):
# dead code? If not needs to be refactored with langstrings
# and moved within text_fragment, maybe?
body = sanitize_html(body, [])
quote = self.quote.replace("\r", "")
try:
# for historical reasons
quote = quopri.decodestring(quote)
except:
pass
quote = sanitize_html(quote, [])
if quote != self.body:
self.body = quote
quote = quote.replace("\n", "")
start = body.find(quote)
lookin = 'message-body'
if start < 0:
xpath = "//div[@id='%s']/div[class='post_title']" % (post_id)
start = title.find(quote)
if start < 0:
return None
lookin = 'message-subject'
xpath = "//div[@id='message-%s']//div[@class='%s']" % (
Post.uri_generic(post_id), lookin)
tfi = self.db.query(TextFragmentIdentifier).filter_by(
extract=self).first()
if not tfi:
tfi = TextFragmentIdentifier(extract=self)
tfi.xpath_start = tfi.xpath_end = xpath
tfi.offset_start = start
tfi.offset_end = start+len(quote)
return tfi
评论列表
文章目录