def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]):
if os.path.isfile("ddi_models/" + model):
os.remove("ddi_models/" + model)
if os.path.isfile("ddi_models/" + model + ".txt"):
os.remove("ddi_models/" + model + ".txt")
#docs = use_external_data(docs, excludesentences, dditype)
xerrors = 0
with open("ddi_models/" + model + ".txt", 'w') as train:
#print pairs
for p in pairs:
if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype:
continue
sid = relations.getSentenceID(p)
if sid not in excludesentences:
tree = pairs[p][relations.PAIR_DEP_TREE][:]
#print "tree1:", tree
#if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20:
#print line
# line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|"
# xerrors += 1
#else:
line = get_svm_train_line(tree, pairs[p], sid,
docs[sid][relations.SENTENCE_PAIRS][p])
if not pairs[p][relations.PAIR_DDI]:
line = '-' + line
elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all":
line = '-' + line
train.write(line)
#print "tree errors:", xerrors
svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5",
"-L", "0.4", "-T", "2", "-S", "2", "-g", "10",
"-D", "0", "-C", "T", basedir + model + ".txt", basedir + model],
stdout = PIPE, stderr = PIPE)
res = svmlightcall.communicate()
if not os.path.isfile("ddi_models/" + model):
print "failed training model " + basedir + model
print res
sys.exit()
评论列表
文章目录