def fresh(self, s, tokenized=False):
"""UD-parse and POS-tag sentence `s`. Returns (UDParse, PTB-parse-string).
Pass in `tokenized=True` if `s` has already been tokenized, otherwise we
apply `nltk.tokenize.TreebankWordTokenizer`.
"""
if self.process is None:
self._start_subprocess()
s = str(s.strip())
if not tokenized:
s = tokenize(s)
s = s.strip()
assert '\n' not in s, "No newline characters allowed %r" % s
try:
self.process.stdin.write(s.encode('utf-8'))
except IOError as e:
#if e.errno == 32: # broken pipe
# self.process = None
# return self(s) # retry will restart process
raise e
self.process.stdin.write(b'\n')
self.process.stdin.flush()
out = self.process.stdout.readline()
if sys.version_info[0] == 3:
out = out.decode()
return self.to_ud(out)
评论列表
文章目录