def Predict(self, conll_path):
with open(conll_path, 'r') as conllFP:
for iSentence, sentence in enumerate(read_conll(conllFP, False)):
self.Init()
forest = ParseForest(sentence)
self.getWordEmbeddings(forest, False)
for root in forest.roots:
root.lstms = [self.builders[0].initial_state().add_input(root.vec),
self.builders[1].initial_state().add_input(root.vec)]
while len(forest.roots) > 1:
self.__evaluate(forest, False)
bestParent, bestChild, bestScore = None, None, float("-inf")
bestIndex, bestOp = None, None
roots = forest.roots
for i in xrange(len(forest.roots) - 1):
for irel, rel in enumerate(self.irels):
for op in xrange(2):
if bestScore < roots[i].scores[irel][op] and (i + (1 - op)) > 0:
bestParent, bestChild = i + op, i + (1 - op)
bestScore = roots[i].scores[irel][op]
bestIndex, bestOp = i, op
bestRelation, bestIRelation = rel, irel
for j in xrange(max(0, bestIndex - self.k - 1), min(len(forest.roots), bestIndex + self.k + 2)):
roots[j].scores = None
roots[bestChild].pred_parent_id = forest.roots[bestParent].id
roots[bestChild].pred_relation = bestRelation
roots[bestParent].lstms[bestOp] = roots[bestParent].lstms[bestOp].add_input((self.activation(self.lstm2lstmbias + self.lstm2lstm *
concatenate([roots[bestChild].lstms[0].output(), lookup(self.model["rels-lookup"], bestIRelation), roots[bestChild].lstms[1].output()]))))
forest.Attach(bestParent, bestChild)
renew_cg()
yield sentence
python类read_conll()的实例源码
def Predict(self, conll_path):
with open(conll_path, 'r') as conllFP:
for iSentence, sentence in enumerate(read_conll(conllFP, False)):
self.Init()
conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]
conll_sentence = conll_sentence[1:] + [conll_sentence[0]]
self.getWordEmbeddings(conll_sentence, False)
stack = ParseForest([])
buf = ParseForest(conll_sentence)
for root in conll_sentence:
root.lstms = [root.vec for _ in xrange(self.nnvecs)]
hoffset = 1 if self.headFlag else 0
while not (len(buf) == 1 and len(stack) == 0):
scores = self.__evaluate(stack, buf, False)
best = max(chain(*scores), key = itemgetter(2) )
if best[1] == 2:
stack.roots.append(buf.roots[0])
del buf.roots[0]
elif best[1] == 0:
child = stack.roots.pop()
parent = buf.roots[0]
child.pred_parent_id = parent.id
child.pred_relation = best[0]
bestOp = 0
if self.rlMostFlag:
parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset]
if self.rlFlag:
parent.lstms[bestOp + hoffset] = child.vec
elif best[1] == 1:
child = stack.roots.pop()
parent = stack.roots[-1]
child.pred_parent_id = parent.id
child.pred_relation = best[0]
bestOp = 1
if self.rlMostFlag:
parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset]
if self.rlFlag:
parent.lstms[bestOp + hoffset] = child.vec
renew_cg()
yield sentence
def Predict(self, conll_path):
with open(conll_path, 'r') as conllFP:
for iSentence, sentence in enumerate(read_conll(conllFP)):
conll_sentence = [entry for entry in sentence if isinstance(entry, utils.ConllEntry)]
for entry in conll_sentence:
wordvec = self.wlookup[int(self.vocab.get(entry.norm, 0))] if self.wdims > 0 else None
posvec = self.plookup[int(self.pos[entry.pos])] if self.pdims > 0 else None
evec = self.elookup[int(self.extrnd.get(entry.form, self.extrnd.get(entry.norm, 0)))] if self.external_embedding is not None else None
entry.vec = concatenate(filter(None, [wordvec, posvec, evec]))
entry.lstms = [entry.vec, entry.vec]
entry.headfov = None
entry.modfov = None
entry.rheadfov = None
entry.rmodfov = None
if self.blstmFlag:
lstm_forward = self.builders[0].initial_state()
lstm_backward = self.builders[1].initial_state()
for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
lstm_forward = lstm_forward.add_input(entry.vec)
lstm_backward = lstm_backward.add_input(rentry.vec)
entry.lstms[1] = lstm_forward.output()
rentry.lstms[0] = lstm_backward.output()
if self.bibiFlag:
for entry in conll_sentence:
entry.vec = concatenate(entry.lstms)
blstm_forward = self.bbuilders[0].initial_state()
blstm_backward = self.bbuilders[1].initial_state()
for entry, rentry in zip(conll_sentence, reversed(conll_sentence)):
blstm_forward = blstm_forward.add_input(entry.vec)
blstm_backward = blstm_backward.add_input(rentry.vec)
entry.lstms[1] = blstm_forward.output()
rentry.lstms[0] = blstm_backward.output()
scores, exprs = self.__evaluate(conll_sentence, True)
heads = decoder.parse_proj(scores)
for entry, head in zip(conll_sentence, heads):
entry.pred_parent_id = head
entry.pred_relation = '_'
dump = False
if self.labelsFlag:
for modifier, head in enumerate(heads[1:]):
scores, exprs = self.__evaluateLabel(conll_sentence, head, modifier+1)
conll_sentence[modifier+1].pred_relation = self.irels[max(enumerate(scores), key=itemgetter(1))[0]]
renew_cg()
if not dump:
yield sentence
def Predict(self, conll_path):
with open(conll_path, 'r') as conllFP:
for iSentence, sentence in enumerate(read_conll(conllFP, False)):
self.Init()
sentence = sentence[1:] + [sentence[0]]
self.getWordEmbeddings(sentence, False)
stack = ParseForest([])
buf = ParseForest(sentence)
for root in sentence:
root.lstms = [root.vec for _ in xrange(self.nnvecs)]
hoffset = 1 if self.headFlag else 0
while len(buf) > 0 or len(stack) > 1 :
scores = self.__evaluate(stack, buf, False)
best = max(chain(*scores), key = itemgetter(2) )
if best[1] == 2:
stack.roots.append(buf.roots[0])
del buf.roots[0]
elif best[1] == 0:
child = stack.roots.pop()
parent = buf.roots[0]
child.pred_parent_id = parent.id
child.pred_relation = best[0]
bestOp = 0
if self.rlMostFlag:
parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset]
if self.rlFlag:
parent.lstms[bestOp + hoffset] = child.vec
elif best[1] == 1:
child = stack.roots.pop()
parent = stack.roots[-1]
child.pred_parent_id = parent.id
child.pred_relation = best[0]
bestOp = 1
if self.rlMostFlag:
parent.lstms[bestOp + hoffset] = child.lstms[bestOp + hoffset]
if self.rlFlag:
parent.lstms[bestOp + hoffset] = child.vec
renew_cg()
yield [sentence[-1]] + sentence[:-1]