def showStaticWords(firstPage, secondPage):
"""
Prints words appearing in two different response pages
"""
infoMsg = "finding static words in longest matching part of dynamic page content"
logger.info(infoMsg)
firstPage = getFilteredPageContent(firstPage)
secondPage = getFilteredPageContent(secondPage)
infoMsg = "static words: "
if firstPage and secondPage:
match = SequenceMatcher(None, firstPage, secondPage).find_longest_match(0, len(firstPage), 0, len(secondPage))
commonText = firstPage[match[0]:match[0] + match[2]]
commonWords = getPageWordSet(commonText)
else:
commonWords = None
if commonWords:
commonWords = list(commonWords)
commonWords.sort(lambda a, b: cmp(a.lower(), b.lower()))
for word in commonWords:
if len(word) > 2:
infoMsg += "'%s', " % word
infoMsg = infoMsg.rstrip(", ")
else:
infoMsg += "None"
logger.info(infoMsg)
评论列表
文章目录