python类eval()的实例源码

evaluation.py 文件源码 项目:speechT 作者: timediv 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def track_decoding(self, decoded_str, expected_str):
    self.letter_edit_distance = editdistance.eval(expected_str, decoded_str)
    self.letter_error_rate = self.letter_edit_distance / len(expected_str)
    self.word_edit_distance = editdistance.eval(expected_str.split(), decoded_str.split())
    self.word_error_rate = self.word_edit_distance / len(expected_str.split())
    self.sum_letter_edit_distance += self.letter_edit_distance
    self.sum_letter_error_rate += self.letter_error_rate
    self.sum_word_edit_distance += self.word_edit_distance
    self.sum_word_error_rate += self.word_error_rate
    self.decodings_counter += 1
evaluation.py 文件源码 项目:speechT 作者: timediv 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def run_step(self, model: SpeechModel, sess: tf.Session, stats: EvalStatistics,
               save: bool, verbose=True, feed_dict: Dict=None):
    global_step = model.global_step.eval()

    # Validate on data set and write summary
    if save:
      avg_loss, decoded, label, summary = model.step(sess, update=False, decode=True, return_label=True,
                                                     summary=True, feed_dict=feed_dict)
      model.summary_writer.add_summary(summary, global_step)
    else:
      avg_loss, decoded, label = model.step(sess, update=False, decode=True,
                                            return_label=True, feed_dict=feed_dict)

    if verbose:
      perplexity = np.exp(float(avg_loss)) if avg_loss < 300 else float("inf")
      print("validation average loss {:.2f} perplexity {:.2f}".format(avg_loss, perplexity))

    # Print decode
    decoded_ids_paths = [Evaluation.extract_decoded_ids(path) for path in decoded]
    for label_ids in Evaluation.extract_decoded_ids(label):
      expected_str = speecht.vocabulary.ids_to_sentence(label_ids)
      if verbose:
        print('expected: {}'.format(expected_str))
      for decoded_path in decoded_ids_paths:
        decoded_ids = next(decoded_path)
        decoded_str = speecht.vocabulary.ids_to_sentence(decoded_ids)
        stats.track_decoding(decoded_str, expected_str)
        if verbose:
          print('decoded: {}'.format(decoded_str))
          print('LED: {} LER: {:.2f} WED: {} WER: {:.2f}'.format(stats.letter_edit_distance,
                                                                 stats.letter_error_rate,
                                                                 stats.word_edit_distance,
                                                                 stats.word_error_rate))
cpi.py 文件源码 项目:inflation_calc 作者: EricSchles 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def closest(self, date=datetime.date.today(), country=None,
                limit=datetime.timedelta(days=366)):
        """
        Get the closest CPI value for a specified date. The date defaults to
        today. A limit can be provided to exclude all values for dates further
        away than defined by the limit. This defaults to 366 days.
        """

        # Try to get the country
        try:
            possible_countries = [self.data[country]]          
        except:
            possible_countries = [elem for elem in self.data.keys() if editdistance.eval(country,elem) < 3]
            if len(possible_countries) == 0:
                return "No country found, typo unlikely for ",country

        # Find the closest date
        country_cpi = {}
        for country in possible_countries:
            min_year_diff = 1000
            min_year = 0
            for year in self.data[country]:
                if min_year_diff > abs(date.year - int(year)):
                    min_year_diff = abs(date.year - int(year))
                    min_year = year
            country_cpi[country] = self.data[country][min_year]
        if len(country_cpi) == 1:
            return country_cpi[country_cpi.keys()[0]]
        else:
            return country_cpi
cpi.py 文件源码 项目:inflation_calc 作者: EricSchles 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def closest(self, date=datetime.date.today(), country=None,
                limit=datetime.timedelta(days=366)):
        """
        Get the closest CPI value for a specified date. The date defaults to
        today. A limit can be provided to exclude all values for dates further
        away than defined by the limit. This defaults to 366 days.
        """

        # Try to get the country
        try:
            possible_countries = [self.data[country]]          
        except:
            possible_countries = [elem for elem in self.data.keys() if editdistance.eval(country,elem) < 3]
            if len(possible_countries) == 0:
                return "No country found, typo unlikely for ",country

        # Find the closest date
        country_cpi = {}
        for country in possible_countries:
            min_year_diff = 1000
            min_year = 0
            for year in self.data[country]:
                if min_year_diff > abs(date.year - int(year)):
                    min_year_diff = abs(date.year - int(year))
                    min_year = year
            country_cpi[country] = self.data[country][min_year]
        if len(country_cpi) == 1:
            return country_cpi[country_cpi.keys()[0]]
        else:
            return country_cpi
score.py 文件源码 项目:speech 作者: awni 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def compute_cer(results):
    """
    Arguments:
        results (list): list of ground truth and
            predicted sequence pairs.

    Returns the CER for the full set.
    """
    dist = sum(editdistance.eval(label, pred)
                for label, pred in results)
    total = sum(len(label) for label, _ in results)
    return dist / total
quora_predictor.py 文件源码 项目:tensorflow-quorakaggle 作者: ram1988 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __evaluateLevensteinDistance(self, question1, question2):
        leven_dis = levendis.eval(question1.lower(), question2.lower())
        return leven_dis
distance.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def fast_levenshtein_distance(self, source, target):
        """Wrapper for the distance function in the Levenshtein module

        Args:
            source (unicode): source word
            target (unicode): target word

        Returns:
            int: minimum number of Levenshtein edits required to get from
                 `source` to `target`
        """
        return int(editdistance.eval(source, target))
distance.py 文件源码 项目:panphon 作者: dmort27 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def fast_levenshtein_distance_div_maxlen(self, source, target):
        """Levenshtein distance divided by maxlen

        Args:
            source (unicode): source word
            target (unicode): target word

        Returns:
            int: minimum number of Levenshtein edits required to get from
                 `source` to `target` divided by the length of the longest
                 of these arguments
        """
        maxlen = max(len(source), len(target))
        return int(editdistance.eval(source, target)) / maxlen
compare.py 文件源码 项目:agrigento 作者: ucsb-seclab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def calc_score(value, values):
    distance = 1000000000
    for v in values:
        if len(value) == len(v):
            d = bit_edit_distance(value, v)
        else:
            d = editdistance.eval(value, v) * 8
        distance = min(distance, d)

    return distance
utils.py 文件源码 项目:attention_ocr 作者: lightcaster 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def batched_wer(ref, hyp):
    ''' Computes mean WER 

    ref: list of references
    hyp: list of corresponding hypotheses

    '''
    assert len(ref) == len(hyp)

    wer = 0.
    for r,f in zip(ref, hyp):
        rate = editdistance.eval(r, f) / len(r)
        wer += rate

    return wer/len(ref)
verifyLogical.py 文件源码 项目:dnnQuery 作者: richardxiong 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def strSimilarity(word1, word2):
    ''' Measure the similarity based on Edit Distance
    ### Measure how similar word1 is with respect to word2
    '''
    diff = ed.eval(word1.lower(), word2.lower())   #search
    # lcs = LCS(word1, word2)   #search
    length = max(len(word1), len(word2))
    if diff >= length:
        similarity = 0.0
    else:
        similarity = 1.0 * (length-diff) / length
    return similarity
verifyLogicalid.py 文件源码 项目:dnnQuery 作者: richardxiong 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def strSimilarity(word1, word2):
    ''' Measure the similarity based on Edit Distance
    ### Measure how similar word1 is with respect to word2
    '''
    diff = ed.eval(word1.lower(), word2.lower())   #search
    # lcs = LCS(word1, word2)   #search
    length = max(len(word1), len(word2))
    if diff >= length:
        similarity = 0.0
    else:
        similarity = 1.0 * (length-diff) / length
    return similarity
verifyLogicaltr.py 文件源码 项目:dnnQuery 作者: richardxiong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def strSimilarity(word1, word2):
    ''' Measure the similarity based on Edit Distance
    ### Measure how similar word1 is with respect to word2
    '''
    diff = ed.eval(word1.lower(), word2.lower())   #search
    # lcs = LCS(word1, word2)   #search
    length = max(len(word1), len(word2))
    if diff >= length:
        similarity = 0.0
    else:
        similarity = 1.0 * (length-diff) / length
    return similarity
verifyLogicalidx.py 文件源码 项目:dnnQuery 作者: richardxiong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def strSimilarity(word1, word2):
    ''' Measure the similarity based on Edit Distance
    ### Measure how similar word1 is with respect to word2
    '''
    diff = ed.eval(word1.lower(), word2.lower())   #search
    # lcs = LCS(word1, word2)   #search
    length = max(len(word1), len(word2))
    if diff >= length:
        similarity = 0.0
    else:
        similarity = 1.0 * (length-diff) / length
    return similarity
rrcmetrics.py 文件源码 项目:pe 作者: anguelos 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def getFSNSMetrics(gtIdTransDict,methodIdTransDict):
    """Provides metrics for the FSNS dataset.
    FM, precision, recall and correctSequences are an implementation of the metrics described in
    "End-to-End Interpretation of the French Street Name Signs Dataset"
    [https://link.springer.com/chapter/10.1007%2F978-3-319-46604-0_30]
    Params:
        gtIdTransDict : sample_id to data dictionary. A simple file name to file contents might do.
        methodIdTransDict : sample_id to data dictionary. A simple file name to file contents might do.

    returns:
        A tuple with floats between 0 and 1 with all worth reporting measurements.
        FM, Precision, Recall, global correct word trascriptions, if someone returned
        "rue" as the transcription of every image, assuming half the images have it, he
        would get a precision of 50%, a recall of ~5% and an FM of ~9.1%.
        He would get a correctSequences score of 0%, and a similarity of e%.
    """
    def compareTexts(sampleTxt,gtTxt):
        relevant=gtTxt.lower().split()
        retrieved=sampleTxt.lower().split()
        correct=(set(relevant).intersection(set(retrieved)))
        similarity=1.0/(1+editdistance.eval(gtTxt.lower(),sampleTxt.lower()))
        res=(len(correct),len(relevant),len(retrieved),relevant==retrieved,similarity)
        return res
    mDict={k:'' for k in gtIdTransDict.keys()}
    mDict.update(methodIdTransDict)
    methodIdTransDict=mDict
    methodKeys=sorted(methodIdTransDict.keys())
    gtKeys=sorted(gtIdTransDict.keys())
    if len(methodKeys)!= len(set(methodKeys))  or len(gtKeys)!= len(set(gtKeys)) or len(set(methodKeys)-set(gtKeys))>0 :#gt and method dissagree on samples
        sys.stderr.write("GT and submission dissagree on the sample ids\n")
        sys.exit(1)
    corectRelevantRetrievedSimilarity=np.zeros([len(gtKeys),5],dtype='float32')
    for k in range(len(gtKeys)):
        sId=gtKeys[k]
        corectRelevantRetrievedSimilarity[k,:]=compareTexts(methodIdTransDict[sId],gtIdTransDict[sId])
    precision=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,1].sum()))
    recall=(corectRelevantRetrievedSimilarity[:,0].sum()/(corectRelevantRetrievedSimilarity[:,2].sum()))
    FM=(2*precision*recall)/(precision+recall)
    correctSequences=corectRelevantRetrievedSimilarity[:,3].mean()
    similarity=corectRelevantRetrievedSimilarity[:,4].mean()
    combinedSoftMetric=(1-FM)*FM+FM*similarity#The better FM is, the less it maters in the overall score
    return combinedSoftMetric,FM,precision,recall,similarity,correctSequences,corectRelevantRetrievedSimilarity
corrector.py 文件源码 项目:markov-sentence-correction 作者: anassinator 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _correct(observed_sentence, bigrams, distribution, max_error_rate):
    """Corrects a given sentence.

    Note: The lower the max_error_rate, the faster the algorithm, but the
          likelier it will fail.

    Args:
        observed_sentence: Observed sentence.
        bigrams: First-order Markov chain of likely word sequences.
        distribution: Error probability distribution function.
        max_error_rate: Maximum number of errors in a word to consider.

    Returns:
        Ordered list of tuples of (corrected sentence, its probability).
        Most likely interpretations come first.
    """
    trellis = [{Sentence.START: (1.0, None)}]

    observed_words = list(observed_sentence)
    number_of_words = len(observed_words)

    for k in range(1, number_of_words):
        observed_word = observed_words[k]
        max_errors = int(len(observed_word) * max_error_rate) + 1

        current_states = {}
        previous_states = trellis[k - 1]
        trellis.append(current_states)

        for previous_word in previous_states:
            previous_prob = previous_states[previous_word][0]

            future_states = bigrams.yield_future_states((previous_word,))
            for possible_word, conditional_prob in future_states:
                # Conditional probability: P(X_k | X_k-1) * previous
                # probability.
                total_prob = conditional_prob * previous_prob

                # Emission probability: P(E_k | X_k).
                distance = editdistance.eval(observed_word, possible_word)
                total_prob *= distribution(distance)

                # Ignore states that have too many mistakes.
                if distance > max_errors:
                    continue

                # Only keep link of max probability.
                if possible_word in current_states:
                    if current_states[possible_word][0] >= total_prob:
                        continue

                current_states[possible_word] = (total_prob, previous_word)

    # Find most likely ending.
    interpretations = list(_backtrack_path(trellis, x) for x in trellis[-1])
    interpretations.sort(key=lambda x: x[1], reverse=True)

    return interpretations
tagger.py 文件源码 项目:pandora 作者: mikekestemont 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test(self, multilabel_threshold=0.5):
        if not self.include_test:
            raise ValueError('Please do not call .test() if no test data is available.')

        score_dict = {}

        # get test predictions:
        test_in = {}
        if self.include_token:
            test_in['focus_in'] = self.test_X_focus
        if self.include_context:
            test_in['context_in'] = self.test_contexts

        test_preds = self.model.predict(test_in,
                                batch_size=self.batch_size)

        if isinstance(test_preds, np.ndarray):
            test_preds = [test_preds]

        if self.include_lemma:
            print('::: Test scores (lemmas) :::')

            pred_lemmas = self.preprocessor.inverse_transform_lemmas(predictions=test_preds[self.lemma_out_idx])
            if self.postcorrect:
                for i in range(len(pred_lemmas)):
                    if pred_lemmas[i] not in self.known_lemmas:
                        pred_lemmas[i] = min(self.known_lemmas,
                                        key=lambda x: editdistance.eval(x, pred_lemmas[i]))
            score_dict['test_lemma'] = evaluation.single_label_accuracies(gold=self.test_lemmas,
                                                 silver=pred_lemmas,
                                                 test_tokens=self.test_tokens,
                                                 known_tokens=self.preprocessor.known_tokens)

        if self.include_pos:
            print('::: Test scores (pos) :::')
            pred_pos = self.preprocessor.inverse_transform_pos(predictions=test_preds[self.pos_out_idx])
            score_dict['test_pos'] = evaluation.single_label_accuracies(gold=self.test_pos,
                                                 silver=pred_pos,
                                                 test_tokens=self.test_tokens,
                                                 known_tokens=self.preprocessor.known_tokens)

        if self.include_morph:     
            print('::: Test scores (morph) :::')
            pred_morph = self.preprocessor.inverse_transform_morph(predictions=test_preds[self.morph_out_idx],
                                                                   threshold=multilabel_threshold)
            if self.include_morph == 'label':
                score_dict['test_morph'] = evaluation.single_label_accuracies(gold=self.test_morph,
                                                 silver=pred_morph,
                                                 test_tokens=self.test_tokens,
                                                 known_tokens=self.preprocessor.known_tokens)                
            elif self.include_morph == 'multilabel':
                score_dict['test_morph'] = evaluation.multilabel_accuracies(gold=self.test_morph,
                                                 silver=pred_morph,
                                                 test_tokens=self.test_tokens,
                                                 known_tokens=self.preprocessor.known_tokens)
        return score_dict
validate_codes.py 文件源码 项目:handelsregister 作者: Amsterdam 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fix_ambiguous(ambiguous_sbi):
    """
    For each ambiguous sbi code find to most likely candidate

     0       vs.id,
     1       vs.naam,
     2       codes.hr_code,
     3       codes.alt_code,
     4       codes.title,
     5       codes.alt_title,
     6       codes.sub_cat,
     7       codes.alt_sub_cat,
     8       codes.mks_title

    """
    original_count = 0
    suggestion_count = 0

    for row in ambiguous_sbi:

        normalcode = row[2]
        zerocode = row[3]

        desc1 = row[4]
        desc2 = row[5]
        original = row[8]

        distance_desc1 = editdistance.eval(desc1, original)
        distance_desc2 = editdistance.eval(desc2, original)

        if distance_desc1 > distance_desc2:
            # the alternative match with 0 is better
            suggestion_count += 1
            ves = hrmodels.Vestiging.objects.get(id=row[0])
            invalid_activiteit = ves.activiteiten.get(sbi_code=normalcode)
            # fix the code
            invalid_activiteit.sbi_code = zerocode
            # save the corrected sbi code
            invalid_activiteit.save()
            # now save updated code
        else:
            # do nothing default is fine
            original_count += 1

        log.debug(f'{normalcode}, {zerocode}, {desc1[:18]}, {desc2[:18]}, {original[:18]}, {distance_desc1}, {distance_desc2}')  # noqa

    log.debug("%s-%s = Original-Suggestion", original_count, suggestion_count)


问题


面经


文章

微信
公众号

扫码关注公众号