iparser.py 文件源码-python代码片段

iparser.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：deep-text-corrector 作者: andabi 项目源码文件源码

def parse_batch(self, sentenceDumpedFileName, parsingDumpedFileName):

        if os.path.exists('../stanford-parser-2012-03-09') == False:
            print >> sys.stderr, 'can not find Stanford parser directory'
            sys.exit(1)

        # tokenized
        cmd = r'java -server -mx4096m -cp "../stanford-parser-2012-03-09/*:" edu.stanford.nlp.parser.lexparser.LexicalizedParser  -retainTMPSubcategories -sentences newline -tokenized -escaper edu.stanford.nlp.process.PTBEscapingProcessor  -outputFormat "wordsAndTags, penn, typedDependencies" -outputFormatOptions "basicDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz ' + sentenceDumpedFileName

        r = os.popen(cmd).read().strip().decode('utf-8')
        f = open(parsingDumpedFileName, 'w')
        f.write(r.encode('utf-8'))
        f.close()

        rlist = r.replace('\n\n\n', '\n\n\n\n').split('\n\n')
        return rlist