def logs_append(self, outstr, prefix=None):
""" Returns the number lines appended """
self.buf_check_init()
if len(outstr) == 0:
return 0
lines = outstr.replace('\r\n', '\n').split('\n')
if prefix is not None:
last_line = lines[-1]
if len(last_line) > 0:
last_line = prefix + last_line
lines = [prefix + line for line in lines[:-1]] + [last_line]
print('\n'.join(lines), file=stderr)
stderr.flush()
#self.vimx.update_noma_buffer(self.buf_map['logs'], lines, append=True)
self.vimx.buffer_scroll_bottom(self.buf_map['logs'])
return len(lines) - 1
python类flush()的实例源码
def build_onehots(self, vocab_size=None):
"""Build one-hot encodings of each sequence."""
# If we're passed a charset size, great - if not, fall back to inferring vocab size
if vocab_size:
self.charsize = vocab_size
vocab = vocab_size
else:
vocab = self.charsize
stderr.write("Constructing one-hot vector data...")
stderr.flush()
time1 = time.time()
# These can be large, so we don't necessarily want them on the GPU
# Thus they're not Theano shared vars
# Also, numpy fancy indexing is fun!
self.x_onehots = np.eye(vocab, dtype=th.config.floatX)[self.x_array]
self.y_onehots = np.eye(vocab, dtype=th.config.floatX)[self.y_array]
time2 = time.time()
stderr.write("done!\nTook {0:.4f} ms.\n".format((time2 - time1) * 1000.0))
def main():
timings = False
start = time.time()
initialize()
if timings: print('initialize {} s'.format(time.time() - start), file=stderr)
start = time.time()
command_table = load_command_table()
if timings: print('load_command_table {} s'.format(time.time() - start), file=stderr)
start = time.time()
group_index = get_group_index(command_table)
if timings: print('get_group_index {} s'.format(time.time() - start), file=stderr)
start = time.time()
snippets = get_snippets(command_table) if AUTOMATIC_SNIPPETS_ENABLED else []
if timings: print('get_snippets {} s'.format(time.time() - start), file=stderr)
while True:
line = stdin.readline()
start = time.time()
request = json.loads(line)
response_data = None
if request['data'].get('request') == 'status':
response_data = get_status()
if timings: print('get_status {} s'.format(time.time() - start), file=stderr)
elif request['data'].get('request') == 'hover':
response_data = get_hover_text(group_index, command_table, request['data']['command'])
if timings: print('get_hover_text {} s'.format(time.time() - start), file=stderr)
else:
response_data = get_completions(group_index, command_table, snippets, request['data'], True)
if timings: print('get_completions {} s'.format(time.time() - start), file=stderr)
response = {
'sequence': request['sequence'],
'data': response_data
}
output = json.dumps(response)
stdout.write(output + '\n')
stdout.flush()
stderr.flush()
def load_from_export_format(export_file, encoding):
trees = []
SCRIPT_FOLDER = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
hf = HeadFinder(join(SCRIPT_FOLDER, "negra.headrules"))
with codecs.open(export_file, encoding=encoding) as fh:
sent_id = None
buffered_lines = []
for line in fh:
if line.startswith("#BOS"):
sent_id = int(line.split(" ")[1])
elif line.startswith("#EOS"):
sent_id2 = int(line.split(" ")[1])
assert(sent_id == sent_id2)
if len(buffered_lines) > 0:
tree = _give_me_a_tree_from_export_format(buffered_lines)
tree.attributes["sent_id"] = sent_id
hf.mark_head(tree)
trees.append(tree)
else:
trees.append(None)
if sent_id % 1000 == 0:
print("loaded %d trees" % sent_id, file=stderr)
stderr.flush()
sent_id = None
buffered_lines = []
elif sent_id is not None:
buffered_lines.append(line)
else:
raise Exception("oh nooooooooo")
return trees
def main(model_dir, test_sentences_file, test_pos_file, out_file, beam_size):
model, params, hyper_params, all_s2i = load_model(model_dir)
if beam_size:
hyper_params['beam_size'] = beam_size
beam = BeamDecoder(params, all_s2i, hyper_params['beam_size'])
pos_tags = load_pos_tags(test_pos_file)
sentences = load_sentences(test_sentences_file)
test_data = zip(sentences, pos_tags)
word_count = 0
time_started = time()
with open(out_file, "w") as fh:
processed = 0
for word_seq, pos_seq in test_data:
word_count += len(word_seq)
predicted_conf, predicted_tree = beam.decode(word_seq, pos_seq)
processed += 1
print(predicted_tree.to_export(processed), file=fh)
if processed % 5 == 0:
print("processed %d"%processed, file=stderr)
stderr.flush()
time_ended = time()
sents_per_sec = len(sentences)/(time_ended-time_started)
words_per_sec = word_count/(time_ended-time_started)
print("sents/sec: %f words/sec: %f"%(sents_per_sec, words_per_sec), file=stderr)
def error(out):
stderr.write("\n ** Error: %s\n" % out)
stderr.flush()
# debug
#
# Print strings to standard out preceeded by "debug:".
#
def eout(emsg):
stderr.write("\n")
stderr.write(" ** Error: %s" % (emsg))
stderr.write("\n")
stderr.flush()
# stdo
#
# My own version of print but won't automatically add a linefeed to the end. And
# does a flush after every write.
#
def stdo(ostr):
stdout.write(ostr)
stdout.flush()
return
def _update_progress(self, current, total, status):
if total:
percent_done = current * 100 / total
message = '{: >3.0f}% complete: {}'.format(percent_done, status)
# Erase the previous message
# (backspace to beginning, space over the text and backspace again)
msg_len = len(self._progress_last_message)
print('\b' * msg_len + ' ' * msg_len + '\b' * msg_len, end='', file=stderr)
print(message, end='', file=stderr)
self._progress_last_message = message
stderr.flush()
if current == total:
print('', file=stderr)
def tick(self):
self.cur += 1
if self.cur % self.step == 0:
stderr.write( str(self.cur ) )
stderr.write( "\r" )
stderr.flush()
def done(self):
stderr.write( str(self.cur ) )
stderr.write( "\n" )
stderr.flush()
def tick(self):
self.cur += 1
newPercent = (100*self.cur)/self.total
if newPercent > self.curPercent:
self.curPercent = newPercent
stderr.write( str(self.curPercent)+"%" )
stderr.write( "\r" )
stderr.flush()
def done(self):
stderr.write( '100%' )
stderr.write( "\n" )
stderr.flush()
def ProgressLine(line):
stderr.write(line)
stderr.write( "\r" )
stderr.flush()
def main_generate():
from Crypto.Random import new
from sys import stderr
randfunc = new().read
def process_func(msg):
stderr.write("%s" % (msg,))
stderr.flush()
C = generate(2048, randfunc, process_func)
print C
print C.p, C.g, C.y, C.x
def _flush_batch(self, batch):
self.on_before_batch_flush(batch)
try:
batch.flush()
finally:
self.on_after_batch_flush(batch)
return 0
def _continue_with_batch(self):
"""
Flushes one of batches (the longest one by default).
:param assert_no_batch: indicates whether exception must be
raised if there is no batch to flush
:return: the batch that was flushed, if there was a flush;
otherwise, ``None``.
"""
batch = self._select_batch_to_flush()
if batch is None:
if _debug_options.DUMP_FLUSH_BATCH:
debug.write('@async: no batch to flush')
else:
return None
self._batches.remove(batch)
self._flush_batch(batch)
return batch
def try_time_based_dump(self, last_task=None):
current_time = time.time()
if (current_time - self._last_dump_time) < _debug_options.SCHEDULER_STATE_DUMP_INTERVAL:
return
self._last_dump_time = current_time
debug.write('\n--- Scheduler state dump: --------------------------------------------')
try:
self.dump()
if last_task is not None:
debug.write('Last task: %s' % debug.str(last_task), 1)
finally:
debug.write('----------------------------------------------------------------------\n')
stdout.flush()
stderr.flush()
def dump_error(error, tb=None):
"""Dumps errors w/async stack traces."""
try:
stderr.write('\n' + (format_error(error, tb=tb) or 'No error'))
finally:
stdout.flush()
stderr.flush()
def dump_stack(skip=0, limit=None):
"""Dumps current stack trace."""
skip += 2 # To skip dump_stack and traceback.extract_stack
if limit is None:
limit = options.STACK_DUMP_LIMIT
print('--- Stack trace: -----------------------------------------------------')
try:
stack = traceback.extract_stack(limit=None if limit is None else limit + skip)
print(''.join(traceback.format_list(stack[:-skip])), end='')
finally:
print('----------------------------------------------------------------------')
stdout.flush()
def async_exception_hook(type, error, tb):
"""Exception hook capable of printing async stack traces."""
global original_hook
stdout.flush()
stderr.flush()
if original_hook is not None:
original_hook(type, error, tb)
dump_error(error, tb=tb)
def main():
logging.basicConfig(stream=stderr, level=INFO)
a = ArgumentParser()
a.add_argument('-data', dest='data', required=True, metavar='WORDLIST',
help="a text file (the corpus) consisting of one word per line. The word may be preceded by a word"\
" count (separated by whitespace), otherwise a count of one is assumed. If the same word "\
"occurs many times, the counts are accumulated.")
a.add_argument('-finish', dest='finish', metavar='float', type=float, default=0.005,
help="convergence threshold. From one pass over all input words to the next, "\
"if the overall coding length in bits (i.e. logprob) of the lexicon together with the corpus "\
"improves less than this value times the number of word types (distinct word forms) in the "\
"data, the program stops. (If this value is small the program runs for a longer time and the "\
"result is in principle more accurate. However, the changes in word splittings during the "\
"last training epochs are usually very small.) The value must be within the range: 0 < float "\
"< 1. Default 0.005")
a.add_argument('-rand', dest='rand', metavar='int', type=int, default=0,
help="random seed that affects the sorting of words when processing them. Default 0")
a.add_argument('-gammalendistr', dest='gammalendistr', type=float, metavar='float', nargs=2,
help="Use Gamma Length distribution with two parameters. Float1 is the prior for the most common "\
"morph length in the lexicon, such that 0 < float1 <= 24*float2. Float2 is the beta value of "\
"the Gamma pdf, such that beta > 0. The beta value affects the wideness of the morph length "\
"distribution. The higher beta, the wider and less discriminative the distribution. If this "\
"option is omitted, morphs in the lexicon are terminated with an end-of-morph character, "\
"which corresponds to an exponential pdf for morph lengths. Suggested values: float1 = 7.0, "\
"float2 = 1.0 ")
a.add_argument('-zipffreqdistr', dest='zipffreqdistr', type=float, metavar='float',
help="Use Zipf Frequency distribution with paramter float1 for the proportion of morphs in the "\
"lexicon that occur only once in the data (hapax legomena): 0 < value < 1. If this option is "\
"omitted a (non-informative) morph frequency distribution based on enumerative coding is used"\
" instead. Suggested value: 0.5")
a.add_argument('-load', dest='load', metavar='filename',
help="An existing model for word splitting is loaded from a file (which is the output of an "\
"earlier run of this program) and the words in the corpus defined using the option '-data "\
"wordlist' are segmented according to the loaded model. That is, "\
"no learning of a new model takes place. The existing model is simply used for segmenting a " \
"list of words. The segmentation takes place using Viterbi search. No new morphs are ever " \
"created (except one-letter morphs, if there is no other way of segmenting a particular input" \
" word)")
a.add_argument('-encoding', dest='encoding', help='Input encoding (defaults to local encoding)')
a.add_argument('-savememory', type=int, nargs='?', help=SUPPRESS)
options = a.parse_args()
if options.load is not None:
m = MorphModel(vars(options))
m.load(options.load)
for word in open(options.data):
print(' + '.join(m.viterbi_segment_word(word.strip())))
else:
m = MorphModel(vars(options))
m.train(options.data)
stderr.flush()
m.print_segmentation()