def track_decoding(self, decoded_str, expected_str):
self.letter_edit_distance = editdistance.eval(expected_str, decoded_str)
self.letter_error_rate = self.letter_edit_distance / len(expected_str)
self.word_edit_distance = editdistance.eval(expected_str.split(), decoded_str.split())
self.word_error_rate = self.word_edit_distance / len(expected_str.split())
self.sum_letter_edit_distance += self.letter_edit_distance
self.sum_letter_error_rate += self.letter_error_rate
self.sum_word_edit_distance += self.word_edit_distance
self.sum_word_error_rate += self.word_error_rate
self.decodings_counter += 1
python类eval()的实例源码
def run_step(self, model: SpeechModel, sess: tf.Session, stats: EvalStatistics,
save: bool, verbose=True, feed_dict: Dict=None):
global_step = model.global_step.eval()
# Validate on data set and write summary
if save:
avg_loss, decoded, label, summary = model.step(sess, update=False, decode=True, return_label=True,
summary=True, feed_dict=feed_dict)
model.summary_writer.add_summary(summary, global_step)
else:
avg_loss, decoded, label = model.step(sess, update=False, decode=True,
return_label=True, feed_dict=feed_dict)
if verbose:
perplexity = np.exp(float(avg_loss)) if avg_loss < 300 else float("inf")
print("validation average loss {:.2f} perplexity {:.2f}".format(avg_loss, perplexity))
# Print decode
decoded_ids_paths = [Evaluation.extract_decoded_ids(path) for path in decoded]
for label_ids in Evaluation.extract_decoded_ids(label):
expected_str = speecht.vocabulary.ids_to_sentence(label_ids)
if verbose:
print('expected: {}'.format(expected_str))
for decoded_path in decoded_ids_paths:
decoded_ids = next(decoded_path)
decoded_str = speecht.vocabulary.ids_to_sentence(decoded_ids)
stats.track_decoding(decoded_str, expected_str)
if verbose:
print('decoded: {}'.format(decoded_str))
print('LED: {} LER: {:.2f} WED: {} WER: {:.2f}'.format(stats.letter_edit_distance,
stats.letter_error_rate,
stats.word_edit_distance,
stats.word_error_rate))
def closest(self, date=datetime.date.today(), country=None,
limit=datetime.timedelta(days=366)):
"""
Get the closest CPI value for a specified date. The date defaults to
today. A limit can be provided to exclude all values for dates further
away than defined by the limit. This defaults to 366 days.
"""
# Try to get the country
try:
possible_countries = [self.data[country]]
except:
possible_countries = [elem for elem in self.data.keys() if editdistance.eval(country,elem) < 3]
if len(possible_countries) == 0:
return "No country found, typo unlikely for ",country
# Find the closest date
country_cpi = {}
for country in possible_countries:
min_year_diff = 1000
min_year = 0
for year in self.data[country]:
if min_year_diff > abs(date.year - int(year)):
min_year_diff = abs(date.year - int(year))
min_year = year
country_cpi[country] = self.data[country][min_year]
if len(country_cpi) == 1:
return country_cpi[country_cpi.keys()[0]]
else:
return country_cpi
def closest(self, date=datetime.date.today(), country=None,
limit=datetime.timedelta(days=366)):
"""
Get the closest CPI value for a specified date. The date defaults to
today. A limit can be provided to exclude all values for dates further
away than defined by the limit. This defaults to 366 days.
"""
# Try to get the country
try:
possible_countries = [self.data[country]]
except:
possible_countries = [elem for elem in self.data.keys() if editdistance.eval(country,elem) < 3]
if len(possible_countries) == 0:
return "No country found, typo unlikely for ",country
# Find the closest date
country_cpi = {}
for country in possible_countries:
min_year_diff = 1000
min_year = 0
for year in self.data[country]:
if min_year_diff > abs(date.year - int(year)):
min_year_diff = abs(date.year - int(year))
min_year = year
country_cpi[country] = self.data[country][min_year]
if len(country_cpi) == 1:
return country_cpi[country_cpi.keys()[0]]
else:
return country_cpi
def compute_cer(results):
"""
Arguments:
results (list): list of ground truth and
predicted sequence pairs.
Returns the CER for the full set.
"""
dist = sum(editdistance.eval(label, pred)
for label, pred in results)
total = sum(len(label) for label, _ in results)
return dist / total
def __evaluateLevensteinDistance(self, question1, question2):
leven_dis = levendis.eval(question1.lower(), question2.lower())
return leven_dis
def fast_levenshtein_distance(self, source, target):
"""Wrapper for the distance function in the Levenshtein module
Args:
source (unicode): source word
target (unicode): target word
Returns:
int: minimum number of Levenshtein edits required to get from
`source` to `target`
"""
return int(editdistance.eval(source, target))
def fast_levenshtein_distance_div_maxlen(self, source, target):
"""Levenshtein distance divided by maxlen
Args:
source (unicode): source word
target (unicode): target word
Returns:
int: minimum number of Levenshtein edits required to get from
`source` to `target` divided by the length of the longest
of these arguments
"""
maxlen = max(len(source), len(target))
return int(editdistance.eval(source, target)) / maxlen
def calc_score(value, values):
distance = 1000000000
for v in values:
if len(value) == len(v):
d = bit_edit_distance(value, v)
else:
d = editdistance.eval(value, v) * 8
distance = min(distance, d)
return distance
def batched_wer(ref, hyp):
''' Computes mean WER
ref: list of references
hyp: list of corresponding hypotheses
'''
assert len(ref) == len(hyp)
wer = 0.
for r,f in zip(ref, hyp):
rate = editdistance.eval(r, f) / len(r)
wer += rate
return wer/len(ref)
def strSimilarity(word1, word2):
''' Measure the similarity based on Edit Distance
### Measure how similar word1 is with respect to word2
'''
diff = ed.eval(word1.lower(), word2.lower()) #search
# lcs = LCS(word1, word2) #search
length = max(len(word1), len(word2))
if diff >= length:
similarity = 0.0
else:
similarity = 1.0 * (length-diff) / length
return similarity
def strSimilarity(word1, word2):
''' Measure the similarity based on Edit Distance
### Measure how similar word1 is with respect to word2
'''
diff = ed.eval(word1.lower(), word2.lower()) #search
# lcs = LCS(word1, word2) #search
length = max(len(word1), len(word2))
if diff >= length:
similarity = 0.0
else:
similarity = 1.0 * (length-diff) / length
return similarity
def strSimilarity(word1, word2):
''' Measure the similarity based on Edit Distance
### Measure how similar word1 is with respect to word2
'''
diff = ed.eval(word1.lower(), word2.lower()) #search
# lcs = LCS(word1, word2) #search
length = max(len(word1), len(word2))
if diff >= length:
similarity = 0.0
else:
similarity = 1.0 * (length-diff) / length
return similarity
def strSimilarity(word1, word2):
''' Measure the similarity based on Edit Distance
### Measure how similar word1 is with respect to word2
'''
diff = ed.eval(word1.lower(), word2.lower()) #search
# lcs = LCS(word1, word2) #search
length = max(len(word1), len(word2))
if diff >= length:
similarity = 0.0
else:
similarity = 1.0 * (length-diff) / length
return similarity
def getFSNSMetrics(gtIdTransDict,methodIdTransDict):
"""Provides metrics for the FSNS dataset.
FM, precision, recall and correctSequences are an implementation of the metrics described in
"End-to-End Interpretation of the French Street Name Signs Dataset"
[https://link.springer.com/chapter/10.1007%2F978-3-319-46604-0_30]
Params:
gtIdTransDict : sample_id to data dictionary. A simple file name to file contents might do.
methodIdTransDict : sample_id to data dictionary. A simple file name to file contents might do.
returns:
A tuple with floats between 0 and 1 with all worth reporting measurements.
FM, Precision, Recall, global correct word trascriptions, if someone returned
"rue" as the transcription of every image, assuming half the images have it, he
would get a precision of 50%, a recall of ~5% and an FM of ~9.1%.
He would get a correctSequences score of 0%, and a similarity of e%.
"""
def compareTexts(sampleTxt,gtTxt):
relevant=gtTxt.lower().split()
retrieved=sampleTxt.lower().split()
correct=(set(relevant).intersection(set(retrieved)))
similarity=1.0/(1+editdistance.eval(gtTxt.lower(),sampleTxt.lower()))
res=(len(correct),len(relevant),len(retrieved),relevant==retrieved,similarity)
return res
mDict={k:'' for k in gtIdTransDict.keys()}
mDict.update(methodIdTransDict)
methodIdTransDict=mDict
methodKeys=sorted(methodIdTransDict.keys())
gtKeys=sorted(gtIdTransDict.keys())
if len(methodKeys)!= len(set(methodKeys)) or len(gtKeys)!= len(set(gtKeys)) or len(set(methodKeys)-set(gtKeys))>0 :#gt and method dissagree on samples
sys.stderr.write("GT and submission dissagree on the sample ids\n")
sys.exit(1)
corectRelevantRetrievedSimilarity=np.zeros([len(gtKeys),5],dtype='float32')
for k in range(len(gtKeys)):
sId=gtKeys[k]
corectRelevantRetrievedSimilarity[k,:]=compareTexts(methodIdTransDict[sId],gtIdTransDict[sId])
precision=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,1].sum()))
recall=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,2].sum()))
FM=(2*precision*recall)/(precision+recall)
correctSequences=corectRelevantRetrievedSimilarity[:,3].mean()
similarity=corectRelevantRetrievedSimilarity[:,4].mean()
combinedSoftMetric=(1-FM)*FM+FM*similarity#The better FM is, the less it maters in the overall score
return combinedSoftMetric,FM,precision,recall,similarity,correctSequences,corectRelevantRetrievedSimilarity
def _correct(observed_sentence, bigrams, distribution, max_error_rate):
"""Corrects a given sentence.
Note: The lower the max_error_rate, the faster the algorithm, but the
likelier it will fail.
Args:
observed_sentence: Observed sentence.
bigrams: First-order Markov chain of likely word sequences.
distribution: Error probability distribution function.
max_error_rate: Maximum number of errors in a word to consider.
Returns:
Ordered list of tuples of (corrected sentence, its probability).
Most likely interpretations come first.
"""
trellis = [{Sentence.START: (1.0, None)}]
observed_words = list(observed_sentence)
number_of_words = len(observed_words)
for k in range(1, number_of_words):
observed_word = observed_words[k]
max_errors = int(len(observed_word) * max_error_rate) + 1
current_states = {}
previous_states = trellis[k - 1]
trellis.append(current_states)
for previous_word in previous_states:
previous_prob = previous_states[previous_word][0]
future_states = bigrams.yield_future_states((previous_word,))
for possible_word, conditional_prob in future_states:
# Conditional probability: P(X_k | X_k-1) * previous
# probability.
total_prob = conditional_prob * previous_prob
# Emission probability: P(E_k | X_k).
distance = editdistance.eval(observed_word, possible_word)
total_prob *= distribution(distance)
# Ignore states that have too many mistakes.
if distance > max_errors:
continue
# Only keep link of max probability.
if possible_word in current_states:
if current_states[possible_word][0] >= total_prob:
continue
current_states[possible_word] = (total_prob, previous_word)
# Find most likely ending.
interpretations = list(_backtrack_path(trellis, x) for x in trellis[-1])
interpretations.sort(key=lambda x: x[1], reverse=True)
return interpretations
def test(self, multilabel_threshold=0.5):
if not self.include_test:
raise ValueError('Please do not call .test() if no test data is available.')
score_dict = {}
# get test predictions:
test_in = {}
if self.include_token:
test_in['focus_in'] = self.test_X_focus
if self.include_context:
test_in['context_in'] = self.test_contexts
test_preds = self.model.predict(test_in,
batch_size=self.batch_size)
if isinstance(test_preds, np.ndarray):
test_preds = [test_preds]
if self.include_lemma:
print('::: Test scores (lemmas) :::')
pred_lemmas = self.preprocessor.inverse_transform_lemmas(predictions=test_preds[self.lemma_out_idx])
if self.postcorrect:
for i in range(len(pred_lemmas)):
if pred_lemmas[i] not in self.known_lemmas:
pred_lemmas[i] = min(self.known_lemmas,
key=lambda x: editdistance.eval(x, pred_lemmas[i]))
score_dict['test_lemma'] = evaluation.single_label_accuracies(gold=self.test_lemmas,
silver=pred_lemmas,
test_tokens=self.test_tokens,
known_tokens=self.preprocessor.known_tokens)
if self.include_pos:
print('::: Test scores (pos) :::')
pred_pos = self.preprocessor.inverse_transform_pos(predictions=test_preds[self.pos_out_idx])
score_dict['test_pos'] = evaluation.single_label_accuracies(gold=self.test_pos,
silver=pred_pos,
test_tokens=self.test_tokens,
known_tokens=self.preprocessor.known_tokens)
if self.include_morph:
print('::: Test scores (morph) :::')
pred_morph = self.preprocessor.inverse_transform_morph(predictions=test_preds[self.morph_out_idx],
threshold=multilabel_threshold)
if self.include_morph == 'label':
score_dict['test_morph'] = evaluation.single_label_accuracies(gold=self.test_morph,
silver=pred_morph,
test_tokens=self.test_tokens,
known_tokens=self.preprocessor.known_tokens)
elif self.include_morph == 'multilabel':
score_dict['test_morph'] = evaluation.multilabel_accuracies(gold=self.test_morph,
silver=pred_morph,
test_tokens=self.test_tokens,
known_tokens=self.preprocessor.known_tokens)
return score_dict
def fix_ambiguous(ambiguous_sbi):
"""
For each ambiguous sbi code find to most likely candidate
0 vs.id,
1 vs.naam,
2 codes.hr_code,
3 codes.alt_code,
4 codes.title,
5 codes.alt_title,
6 codes.sub_cat,
7 codes.alt_sub_cat,
8 codes.mks_title
"""
original_count = 0
suggestion_count = 0
for row in ambiguous_sbi:
normalcode = row[2]
zerocode = row[3]
desc1 = row[4]
desc2 = row[5]
original = row[8]
distance_desc1 = editdistance.eval(desc1, original)
distance_desc2 = editdistance.eval(desc2, original)
if distance_desc1 > distance_desc2:
# the alternative match with 0 is better
suggestion_count += 1
ves = hrmodels.Vestiging.objects.get(id=row[0])
invalid_activiteit = ves.activiteiten.get(sbi_code=normalcode)
# fix the code
invalid_activiteit.sbi_code = zerocode
# save the corrected sbi code
invalid_activiteit.save()
# now save updated code
else:
# do nothing default is fine
original_count += 1
log.debug(f'{normalcode}, {zerocode}, {desc1[:18]}, {desc2[:18]}, {original[:18]}, {distance_desc1}, {distance_desc2}') # noqa
log.debug("%s-%s = Original-Suggestion", original_count, suggestion_count)