def buildMap(train_path):
df_train = pd.read_csv(train_path,delimiter='\t',quoting=csv.QUOTE_NONE,skip_blank_lines=False,header=None,names=['word','label'])
# print df_train
# print df_train['word'][df_train['word'].notnull()]
words = list(set(df_train['word'][df_train['word'].notnull()]))
labels = list(set(df_train['label'][df_train['label'].notnull()]))
word2id = dict(zip(words,range(1,len(words)+1)))
label2id = dict(zip(labels,range(1,len(labels)+1)))
id2word = dict(zip(range(1,len(words)+1),words))
id2label = dict(zip(range(1, len(labels) + 1), labels))
id2word[0] = "<PAD>"
id2label[0] = "<PAD>"
word2id["<PAD>"] = 0
label2id["<PAD>"] = 0
id2word[len(words)+1] = "<NEW>"
id2label[len(labels)+1] = "<NEW>"
word2id["<NEW>"] = len(words)+1
label2id["<NEW>"] = len(labels)+1
saveMap(id2word,id2label)
return word2id,id2word,label2id,id2label
评论列表
文章目录