multiinstance.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:IBRel 作者: lasigeBioTM 项目源码 文件源码
def __init__(self, corpus, pairtype, relations, modelname="mil_classifier.model", test=False, ner="goldstandard",
                 generate=True):
        super(MILClassifier, self).__init__()
        self.modelname = modelname
        self.pairtype = pairtype
        self.pairs = {}  # (e1.normalized, e2.normalized) => (e1, e2)
        self.instances = {}  # bags of instances (e1.normalized, e2.normalized) -> all instances with these two entities
        self.labels = {} # (e1.normalized, e2.normalized) => label (-1/1)
        self.bag_labels = []  # ordered list of labels for each bag
        self.bag_pairs = []  # ordered list of pair labels (e1.normalized, e2.normalized)
        self.data = []  # ordered list of bags, each is a list of feature vectors
        self.predicted = []  # ordered list of predictions for each bag
        self.resultsfile = None
        self.examplesfile = None
        self.ner_model = ner
        self.vectorizer = CountVectorizer(min_df=0.2, ngram_range=(1, 1), token_pattern=r'\b\w+\-\w+\b')
        self.corpus = corpus

        #self.vectorizer = TfidfVectorizer(min_df=0.2, ngram_range=(1, 1), token_pattern=r'\b\w+\-\w+\b', max_features=)
        #self.classifier = misvm.MISVM(kernel='linear', C=1.0, max_iters=20)
        self.classifier = misvm.sMIL(kernel='linear', C=1)
        #self.classifier = misvm.MissSVM(kernel='linear', C=100) #, max_iters=20)
        #if generate:
        #    self.generateMILdata(test=test, pairtype=pairtype, relations=relations)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号