def buildMap(train_path="train.in"):
df_train = pd.read_csv(train_path, delimiter='\t', quoting=csv.QUOTE_NONE, skip_blank_lines=False, header=None, names=["char", "label"])
chars = list(set(df_train["char"][df_train["char"].notnull()]))
labels = list(set(df_train["label"][df_train["label"].notnull()]))
char2id = dict(zip(chars, range(1, len(chars) + 1)))
label2id = dict(zip(labels, range(1, len(labels) + 1)))
id2char = dict(zip(range(1, len(chars) + 1), chars))
id2label = dict(zip(range(1, len(labels) + 1), labels))
id2char[0] = "<PAD>"
id2label[0] = "<PAD>"
char2id["<PAD>"] = 0
label2id["<PAD>"] = 0
id2char[len(chars) + 1] = "<NEW>"
char2id["<NEW>"] = len(chars) + 1
saveMap(id2char, id2label)
return char2id, id2char, label2id, id2label
评论列表
文章目录