def write_split_emote(cls):
retweet_re = re.compile(r'RT\s*"?[@?][a-zA-Z0-9_]+:?')
with open('emote/all.txt', encoding='utf-8') as in_sr, ExitStack() as stack:
out_srs = {
name: stack.enter_context(open('emote/class_{}.txt'.format(name), 'w', encoding='utf-8'))
for name in ['pos', 'neg']
}
for i, line in enumerate(in_sr):
if retweet_re.search(line):
continue
counts = [0, 0, 0]
for match in cls.emoticon_re.finditer(line):
counts[Emoticons.assess_match(match)] += 1
label = None
if counts[0] > 0 and counts[1] == 0 and counts[2] == 0:
label = 0
elif counts[0] == 0 and counts[1] == 0 and counts[2] > 0:
label = 2
if label is not None:
out_srs[label].write(cls.emoticon_re.sub(' ', line).strip() + '\n')
评论列表
文章目录