def main():
args = docopt(__doc__)
# Parse args
inp = args['--in']
out = args['--out']
include_id = args["--id"] is not None
logging.info("Loading spaCy...")
pe = prop_extraction(include_id)
# Differentiate between single input file and directories
if os.path.isdir(inp):
logging.debug('Running on directories:')
num_of_lines = num_of_extractions = 0
for input_fn in glob(os.path.join(inp, '*.txt')):
output_fn = os.path.join(out, path_leaf(input_fn).replace('.txt', '.prop'))
logging.debug('input file: {}\noutput file:{}'.format(input_fn, output_fn))
cur_line_counter, cur_extractions_counter = run_single_file(input_fn, output_fn, pe)
num_of_lines += cur_line_counter
num_of_extractions += cur_extractions_counter
else:
logging.debug('Running on single files:')
num_of_lines, num_of_extractions = run_single_file(inp, out, pe)
logging.info('# Sentences: {} \t #Extractions: {} \t Extractions/sentence Ratio: {}'.
format(num_of_lines, num_of_extractions, float(num_of_extractions) / num_of_lines))
评论列表
文章目录