def __init__(self, file_name, user_id):
with open(file_name, 'r') as self.opened_file:
# So Instapaper doesn't close <li> tags
# This was causing infinite recursion when using BS directly
# Hence why the stuff below is being done, so that the <li> tags get closed
self.html = html.document_fromstring(self.opened_file.read())
self.html = html.tostring(self.html)
self.soup = BeautifulSoup4(self.html)
self.user = user_id
self.urls = dict()
self.check_duplicates = dict()
self.check_duplicates_query = Bookmark.query.filter(Bookmark.user == self.user,
Bookmark.deleted == False).all()
for bmark in self.check_duplicates_query:
self.check_duplicates[bmark.main_url] = bmark
self.tags_dict = dict()
self.tags_set = set()
self.valid_url = re.compile(
r'^(?:[a-z0-9\.\-]*)://'
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?<!-)\.?)|'
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|'
r'\[?[A-F0-9]*:[A-F0-9:]+\]?)'
r'(?::\d+)?'
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
评论列表
文章目录