python类eval()的实例源码

image_ocr.py 文件源码 项目:keras 作者: GeekLiB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
image_ocr.py 文件源码 项目:pCVR 作者: xjtushilei 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
corrector.py 文件源码 项目:markov-sentence-correction 作者: anassinator 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def total_distance(observed_sentence, corrected_sentence):
    """Calculates the total distance between the two given sentences.

    Args:
        observed_sentence: Observed sentence.
        corrected_sentence: Corrected sentence.

    Returns:
        Total Levenshtein distance between the two sentences.
    """
    total_distance = 0

    observed_words = list(observed_sentence)
    corrected_words = list(corrected_sentence)

    for i in range(len(observed_words)):
        comparable_words = observed_words[i], corrected_words[i]
        total_distance += editdistance.eval(*comparable_words)

    return total_distance
thaanaocr.py 文件源码 项目:thaanaOCR 作者: Sofwath 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
image_ocr.py 文件源码 项目:keras-customized 作者: ambrite 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
image_ocr_gpu.py 文件源码 项目:keras-mxnet-benchmarks 作者: sandeep-krishnamurthy 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
analyse.py 文件源码 项目:chat-roulette-python 作者: ph4r05 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def similarities(self):
        """
        Compute Levenshtein distance matrix between files (implemented in C++ pip package: editdistance)
        Later: https://docs.python.org/2/library/difflib.html
        :return:
        """

        ucos = sorted(self.filedb.keys())
        sims = {}

        for idx, uco in enumerate(ucos):
            logger.info('Comparing %s...' % uco)
            sims[uco] = {}

            for idx2, uco2 in enumerate(ucos[idx+1:]):
                dist = editdistance.eval(self.file_data[uco], self.file_data[uco2])
                sims[uco][uco2] = dist
                logger.info(' %6d vs %6d : %4d  %s  %s' % (uco, uco2, dist, self.filedb[uco], self.filedb[uco2]))
correct_item_descriptions.py 文件源码 项目:event-cui-transfer 作者: mit-ddig 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def best_match(word, corrected_med_list, corrected_english_list):
    min_dist_med = len(word)
    best_med_word = ''
    min_dist_eng = len(word)
    best_eng_word = ''
    for word_t in corrected_med_list:
        if editdistance.eval(word, word_t) < min_dist_med:
            min_dist_med = editdistance.eval(word, word_t)
            best_med_word = word_t

    for word_t in corrected_english_list:
        if editdistance.eval(word, word_t) < min_dist_eng:
            min_dist_eng = editdistance.eval(word, word_t)
            best_eng_word = word_t
    if min_dist_med <= min_dist_eng:
        return best_med_word
    else:
        return best_eng_word
image_ocr.py 文件源码 项目:keras 作者: NVIDIA 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
comparison.py 文件源码 项目:ws-backend-community 作者: lavalamp- 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def compare_strings_by_edit_distance(first=None, second=None):
        """
        Get the edit distance between the two strings passed to this method.
        :param first: The first string to compare.
        :param second: The second string to compare.
        :return: A number representing the edit distance between the two strings passed
        as arguments to this method.
        """
        return editdistance.eval(first, second)

    # Class Methods

    # Public Methods

    # Protected Methods

    # Private Methods

    # Properties

    # Representation and Comparison
image_ocr.py 文件源码 项目:keras-101 作者: burness 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
            for j in range(0, num_proc):
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
Cluster.py 文件源码 项目:rebuild_obfuscator 作者: irobert-tluo 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def simscore(a1, b1):
        max_len = max([len(a1), len(b1)])
        if max_len == 0:
            return 0
        dist = editdistance.eval(a1, b1)
        if dist > max_len:
          print dist
        return 1.0 - (float(dist)/float(max_len))
SimScore.py 文件源码 项目:rebuild_obfuscator 作者: irobert-tluo 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def similarity(a1, b1):
  max_len = max([len(a1), len(b1)])
  if max_len == 0:
      return 0
  dist = editdistance.eval(a1, b1)
  return 1.0 - (float(dist)/float(max_len))
net.py 文件源码 项目:speechless 作者: JuliusKunze 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def letter_error_count(self) -> float:
        return editdistance.eval(self.expected, self.predicted)
net.py 文件源码 项目:speechless 作者: JuliusKunze 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def word_error_count(self) -> float:
        return editdistance.eval(self.expected_words, self.predicted.split())
post_correction.py 文件源码 项目:DeepLearning-OCR 作者: xingjian-f 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def edit_dis(a, b):
    return editdistance.eval(a, b)
rrcmetrics.py 文件源码 项目:pe 作者: anguelos 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def getEditDistanceMat(gtTranscriptions,sampleTranscriptions):
    outputShape=[len(gtTranscriptions),len(sampleTranscriptions)]
    distMat=np.empty(outputShape)
    maxSizeMat=np.empty(outputShape)
    for gtNum in range(len(gtTranscriptions)):
        for sampleNum in range(len(sampleTranscriptions)):
            distMat[gtNum,sampleNum]=editdistance.eval(gtTranscriptions[gtNum],sampleTranscriptions[sampleNum])
            maxSizeMat[gtNum,sampleNum]=max(len(gtTranscriptions[gtNum]),len(sampleTranscriptions[sampleNum]))
    return distMat/maxSizeMat,distMat
author_util.py 文件源码 项目:json-merger 作者: inveniosoftware-contrib 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def _normalized_edit_dist(s1, s2):
    return float(editdistance.eval(s1, s2)) / max(len(s1), len(s2), 1)
identify.py 文件源码 项目:Library-Identification 作者: Riscure 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def compare_cc_list_levenshtein(sample, ref):
    """
    Compares the cyclomatic complexity values of all functions in `sample`
    with those of all functions in `ref`, by taking the Levenshtein distance
    between these lists. This detects added/removed functions and functions
    that have changed in complexity between a sample and a reference.
    """
    if hasattr(ref, 'cclist') and ref.cclist is not None:
        ratio = 1 - (editdistance.eval(sample.cclist, ref.cclist)
                    / float(max(len(sample.cclist), len(ref.cclist))))
    else:
        ratio = 0.0

    return (ratio * 100, ref.name, ref.version)
tagger.py 文件源码 项目:pandora 作者: mikekestemont 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def annotate(self, tokens):
        X_focus = self.preprocessor.transform(tokens=tokens)['X_focus']
        X_context = self.pretrainer.transform(tokens=tokens)

        # get predictions:
        new_in = {}
        if self.include_token:
            new_in['focus_in'] = X_focus
        if self.include_context:
            new_in['context_in'] = X_context
        preds = self.model.predict(new_in)

        if isinstance(preds, np.ndarray):
            preds = [preds]

        annotation_dict = {'tokens': tokens}
        if self.include_lemma:
            pred_lemmas = self.preprocessor.inverse_transform_lemmas(predictions=preds[self.lemma_out_idx])
            annotation_dict['lemmas'] = pred_lemmas
            if self.postcorrect:
                for i in range(len(pred_lemmas)):
                    if pred_lemmas[i] not in self.known_lemmas:
                        pred_lemmas[i] = min(self.known_lemmas,
                                            key=lambda x: editdistance.eval(x, pred_lemmas[i]))
                annotation_dict['postcorrect_lemmas'] = pred_lemmas

        if self.include_pos:
            pred_pos = self.preprocessor.inverse_transform_pos(predictions=preds[self.pos_out_idx])
            annotation_dict['pos'] = pred_pos

        if self.include_morph:
            pred_morph = self.preprocessor.inverse_transform_morph(predictions=preds[self.morph_out_idx])
            annotation_dict['morph'] = pred_morph

        return annotation_dict
webman.py 文件源码 项目:WebMan 作者: flipflop97 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def searchPackages(name):
    results = loadJson('https://www.archlinux.org/packages/search/json/?q=%s' % name)['results']
    results = sorted(results, key=lambda x: levdist(name, x['pkgname']))[:100]
    packages = [parsePackage(package, name) for package in results if package['arch'] in (arch, 'any')]

    results = loadJson('https://aur.archlinux.org/rpc/?v=5&type=search&arg=%s' % name)['results']
    results = sorted(results, key=lambda x: levdist(name, x['Name']))[:100]
    packages += [parsePackage(package, name) for package in results]

    packages = sorted(packages, key=lambda x: levdist(name, x[0]))[:100]
    return packages
compute_real_effectiveness.py 文件源码 项目:atropos 作者: jdidion 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def set_trimming(self, u, t, use_edit_distance=True):
        untrimmed = u.query_sequence.upper()
        untrimmed_len = len(untrimmed)
        trimmed = t.query_sequence.upper()
        trimmed_len = len(trimmed)

        trimmed_front = 0 if use_edit_distance else -1
        if use_edit_distance and (untrimmed_len > trimmed_len):
            for i in range(untrimmed_len - trimmed_len + 1):
                if untrimmed[i:(i+trimmed_len)] == trimmed:
                    trimmed_front = i
                    break
            else:
                # Since Skewer performs automatic error correction, the trimmed and
                # untrimmed reads may not match, so in that case we find the closest
                # match by Levenshtein distance.
                dist = None
                for i in range(untrimmed_len - trimmed_len + 1):
                    d = editdistance.eval(untrimmed[i:(i+trimmed_len)], trimmed)
                    if not dist:
                        dist = d
                    elif d < dist:
                        trimmed_front = i
                        dist = d

        self.trimmed_front = trimmed_front
        self.trimmed_back = untrimmed_len - (trimmed_len + trimmed_front)
distance.py 文件源码 项目:sequtils 作者: atgtag 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def edit(seq1, seq2):
    """
    Wrapper around editdistance.eval for fast Levenshtein
    distance computation.

    Args:
        seq1 (str): Reference sequence
        seq2 (str): Sequence to compare

    Examples:
        >>> edit('banana', 'bahama')
        2
    """
    return int(ed.eval(seq1, seq2))
feat_gen.py 文件源码 项目:kaggle 作者: rbauld 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def edit_distance(train_in, test_in, qcolumns = ['question1', 'question2'], append=''):

    train = train_in.copy().loc[:,qcolumns]
    test = test_in.copy().loc[:,qcolumns]

    import editdistance

    def my_fun(row, qcolumns):
        return editdistance.eval(row[qcolumns[0]], row[qcolumns[1]])

    key = 'edit_dist'+append
    train[key] = train.apply(lambda x: my_fun(x, qcolumns=qcolumns), axis=1)
    test[key]  = test.apply(lambda x: my_fun(x, qcolumns=qcolumns), axis=1)

    return (train, test)
resolver.py 文件源码 项目:social-vuln-scanner 作者: Betawolf 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def bestNameDiff(profileone, profiletwo):
    """ Applies Levenshtein distance between best names of two profiles."""
    n1 = profileone.bestname()
    n2 = profiletwo.bestname()
    if (not n1) or (not n2):
      return 0
    l1 = profileone.name_length
    l2 = profiletwo.name_length
    diff = editdistance.eval(n1,n2)
    return 1-(diff/(l1 if l1 > l2 else l2))
vuln_scorer.py 文件源码 项目:social-vuln-scanner 作者: Betawolf 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def string_sim(n1, n2):
    """ Applies Levenshtein distance between strings."""
    if (not n1) or (not n2):
      return 0
    l1 = len(n1)
    l2 = len(n2)
    diff = editdistance.eval(n1,n2)
    return 1-(diff/(l1 if l1 > l2 else l2))
Mandalorion_12_Create_Consensi.py 文件源码 项目:Mandalorion 作者: christopher-vollmers 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def collect_file_paths(path,gene_file):
   genes_of_interest=[]
   for line in open(gene_file):
       genes_of_interest.append(line.strip())

   isoform_list=[]
   gene_read_counter={}
   isoform_read_counter={}
   for gene in genes_of_interest:
       gene_read_counter[gene]=0
       for file1 in sorted(os.listdir(path+'/parsed_reads')):
           if gene in file1:

               file2=file1+'_sub'
               out_sub=open(path+'/parsed_reads/'+file2,'w') 
               counter=0
               isoform_reads=read_fasta(path+'/parsed_reads/'+file1)
               isoform_read_list=list(isoform_reads.keys())
               print(gene_read_counter,gene_read_counter[gene],len(isoform_reads.keys()))
               gene_read_counter[gene]+=len(isoform_reads.keys())
               isoform_read_counter[path+'/parsed_reads/'+file2]=len(isoform_reads.keys())
               read1 = isoform_read_list[0]
               out_sub.write('>'+read1+'\n'+isoform_reads[read1]+'\n')
               for read2 in isoform_read_list[1::]:
                   if counter<subsample:
                       out_sub.write('>'+read2+'\n')
                       dist_1 = editdistance.eval(isoform_reads[read1],isoform_reads[read2])**2/float(len(isoform_reads[read1])*len(isoform_reads[read2]))
                       dist_2 = editdistance.eval(isoform_reads[read1],reverse_complement(isoform_reads[read2]))**2/float(len(isoform_reads[read1])*len(isoform_reads[read2]))
                       if dist_1 < dist_2:
                           out_sub.write(isoform_reads[read2]+'\n')
                       else:
                           out_sub.write(reverse_complement(isoform_reads[read2])+'\n')
                   counter+=1


               isoform_list.append((path+'/parsed_reads/'+file2,gene))

   return isoform_list,gene_read_counter,isoform_read_counter
test_simulate_seq.py 文件源码 项目:wub 作者: nanoporetech 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_simulate_sequencing_errors(self):
        """Test function simulating sequencing errors."""
        error_rate = 0.1
        error_weights = {'substitution': 1.0 / 6,
                         'insertion': 1.0 / 6,
                         'deletion': 4.0 / 6}
        sequence = sim_seq.simulate_sequence(5000)
        mutated_record = sim_seq.simulate_sequencing_errors(
            sequence, error_rate, error_weights)
        distance = editdistance.eval(sequence, mutated_record.seq)
        expected_errors = len(sequence) * error_rate
        errors_sd = np.sqrt(len(sequence) * error_rate * (1 - error_rate))
        # Should pass 0.9973 proportion of cases:
        self.assertTrue(expected_errors - errors_sd * 3 < distance < expected_errors +
                        errors_sd * 3, msg="expected: {} realised:{}".format(expected_errors, distance))
ockre.py 文件源码 项目:OCkRE 作者: rossumai 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def show_edit_distance(self, num):
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        wrong = 0
        right = 0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc], word_batch['labeltype_input'][0:num_proc])
            for j in range(0, num_proc):
                ocr_result = deaccent(unicode(re.sub("[\+\/]", "", re.sub("\\s", "", decoded_res[j])), 'utf-8'))
                gold_label = re.sub("[\+\/]", "", re.sub("\\s", "", word_batch['source_str'][j]))
                if gold_label == ocr_result:
                    right += 1
                else:
                    wrong += 1
                edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        absacc = float(right) / (float(right) + float(wrong))
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        outline = ' Out of %d samples:  Mean edit distance: %.3f Mean normalized edit distance: %0.3f\n Absolute accuracy over labels is %0.2f\n' % (
            num, mean_ed, mean_norm_ed, absacc)
        print(outline)

        return mean_norm_ed, absacc
eval_task2.py 文件源码 项目:rctw17 作者: bgshih 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def text_distance(str1, str2):
  str1 = normalize_txt(str1)
  str2 = normalize_txt(str2)
  return editdistance.eval(str1, str2)


问题


面经


文章

微信
公众号

扫码关注公众号