ddi_kernels.py 文件源码-python代码片段

def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]):
    if os.path.isfile("ddi_models/" + model):
        os.remove("ddi_models/" + model)
    if os.path.isfile("ddi_models/" + model + ".txt"):
        os.remove("ddi_models/" + model + ".txt")

    #docs = use_external_data(docs, excludesentences, dditype)
    xerrors = 0
    with open("ddi_models/" + model + ".txt", 'w') as train:
        #print pairs
        for p in pairs:
            if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype:
                continue
            sid = relations.getSentenceID(p)
            if sid not in excludesentences:
                tree = pairs[p][relations.PAIR_DEP_TREE][:]
                #print "tree1:", tree
                #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20:
                    #print line
                #    line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|"
                #    xerrors += 1
                #else:
                line = get_svm_train_line(tree, pairs[p], sid,
                                              docs[sid][relations.SENTENCE_PAIRS][p])
                if not pairs[p][relations.PAIR_DDI]:
                    line = '-' + line
                elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all":
                    line = '-' + line

                train.write(line)
    #print "tree errors:", xerrors
    svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5",
                          "-L", "0.4", "-T", "2", "-S", "2", "-g", "10",
                          "-D", "0", "-C", "T", basedir + model + ".txt", basedir + model],
                         stdout = PIPE, stderr = PIPE)
    res  = svmlightcall.communicate()
    if not os.path.isfile("ddi_models/" + model):
        print "failed training model " + basedir + model
        print res
        sys.exit()