def html_entity_decode(self, string): string = string.decode('UTF-8') s = re.compile("&#?(\w+?);").sub(self.html_entity_decode_char, string) return s.encode('UTF-8')