def run(self):
ret = []
source = open(self.filepath, "rb").read()
# Get rid of superfluous comments.
source = re.sub("/\\*.*?\\*/", "", source, flags=re.S)
for script in re.findall(self.script_re, source, re.I | re.S):
try:
x = bs4.BeautifulSoup(script, "html.parser")
language = x.script.attrs.get("language", "").lower()
except:
language = None
# We can't rely on bs4 or any other HTML/XML parser to provide us
# with the raw content of the xml tag as they decode html entities
# and all that, leaving us with a corrupted string.
source = re.match("<.*>(.*)</.*>$", script, re.S).group(0)
# Decode JScript.Encode encoding.
if language in ("jscript.encode", "vbscript.encode"):
source = self.decode(source)
ret.append(to_unicode(source))
return ret
评论列表
文章目录