def __init__(self, corpus, pairtype, relations, modelname="mil_classifier.model", test=False, ner="goldstandard",
generate=True):
super(MILClassifier, self).__init__()
self.modelname = modelname
self.pairtype = pairtype
self.pairs = {} # (e1.normalized, e2.normalized) => (e1, e2)
self.instances = {} # bags of instances (e1.normalized, e2.normalized) -> all instances with these two entities
self.labels = {} # (e1.normalized, e2.normalized) => label (-1/1)
self.bag_labels = [] # ordered list of labels for each bag
self.bag_pairs = [] # ordered list of pair labels (e1.normalized, e2.normalized)
self.data = [] # ordered list of bags, each is a list of feature vectors
self.predicted = [] # ordered list of predictions for each bag
self.resultsfile = None
self.examplesfile = None
self.ner_model = ner
self.vectorizer = CountVectorizer(min_df=0.2, ngram_range=(1, 1), token_pattern=r'\b\w+\-\w+\b')
self.corpus = corpus
#self.vectorizer = TfidfVectorizer(min_df=0.2, ngram_range=(1, 1), token_pattern=r'\b\w+\-\w+\b', max_features=)
#self.classifier = misvm.MISVM(kernel='linear', C=1.0, max_iters=20)
self.classifier = misvm.sMIL(kernel='linear', C=1)
#self.classifier = misvm.MissSVM(kernel='linear', C=100) #, max_iters=20)
#if generate:
# self.generateMILdata(test=test, pairtype=pairtype, relations=relations)
评论列表
文章目录