python类defaultdict()的实例源码

IDADebugger.py 文件源码 项目:VMAttack 作者: anatolikalysch 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, *args):
        super(IDADebugger, self).__init__(*args)
        self.hooked = False
        self.trace = Trace()
        self._module_name = 'IDADbg'
        self.arch = get_arch_dynamic()
        # init the cpu context with 0
        if self.arch == 32:
            self.ctx = {c: '0' for c in ['eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf',
                         'af', 'tf', 'df']}
        elif self.arch == 64:
            self.ctx = {c: '0' for c in ['rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12',
                         'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']}

        self.IAT = []
        self.func_args = defaultdict(lambda: set())
loc2lang_withpi.py 文件源码 项目:geomdn 作者: afshinrahimi 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_named_entities(documents, mincount=10):
    '''
    given a list of texts find words that more than 
    50% of time start with a capital letter and return them as NE
    '''
    word_count = defaultdict(int)
    word_capital = defaultdict(int)
    NEs = []
    token_pattern = r'(?u)(?<![#@])\b\w+\b'
    tp = re.compile(token_pattern)
    for doc in documents:
        words = tp.findall(doc)
        for word in words:
            if word[0].isupper():
                word_capital[word.lower()] += 1
            word_count[word.lower()] += 1

    for word, count in word_count.iteritems():
        if count < mincount: continue
        capital = word_capital[word]
        percent = float(capital) / count
        if percent > 0.7:
            NEs.append(word)
    return NEs
loc2lang.py 文件源码 项目:geomdn 作者: afshinrahimi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_named_entities(documents, mincount=10):
    '''
    given a list of texts find words that more than 
    50% of time start with a capital letter and return them as NE
    '''
    word_count = defaultdict(int)
    word_capital = defaultdict(int)
    NEs = []
    token_pattern = r'(?u)(?<![#@])\b\w+\b'
    tp = re.compile(token_pattern)
    for doc in documents:
        words = tp.findall(doc)
        for word in words:
            if word[0].isupper():
                word_capital[word.lower()] += 1
            word_count[word.lower()] += 1

    for word, count in word_count.iteritems():
        if count < mincount: continue
        capital = word_capital[word]
        percent = float(capital) / count
        if percent > 0.7:
            NEs.append(word)
    return NEs
multi_channel_experiment.py 文件源码 项目:bof-aed 作者: rgrzeszi 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def calc_log_prob_for_files(self, annotations):
        '''
        Calculate the logprobs for the classification windows given in annotations
        @param annotations: Annotations as read from annotation file
        @return: tuple (features, labels). features is a list of logprobs-matrices for the windows.
                        labels is numpy-array of the labels for the respective windows.
        '''

        features = []
        labels = []

        annotation_dict = defaultdict(list)
        for anno in annotations:
            annotation_dict[anno[3]].append(anno)

        for filename, annos in annotation_dict.items():
            path = self.basepath + '/audio/' + filename
            self._calc_log_probs_for_windows(path, annos, features, labels)

        return features, np.array(labels)
multi_channel_experiment.py 文件源码 项目:bof-aed 作者: rgrzeszi 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def read_test_files(self, annotation_file):
        '''
        Read files for testing
        '''
        features_test = []
        labels_test = []

        annotation_file = self.basepath + '/annotations/general/' + annotation_file
        annotations = self._read_annotations(annotation_file)

        annotation_dict = defaultdict(list)
        for anno in annotations:
            annotation_dict[anno[3]].append(anno)

        for filename, annos in annotation_dict.items():
            path = self.basepath + '/audio/' + filename
            features, labels = self._read_test_windows(path, annos)
            features_test.extend(features)
            labels_test.extend(labels)

        return features_test, labels_test
clouds.py 文件源码 项目:KDDCUP2016 作者: hugochan 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_citation_positions(db, paper_id) :
    query = """SELECT r.paper_id, 
                                        cg.start, cg.end 
                                        FROM refs r 
                                        JOIN citations c ON r.id=c.ref_id 
                                        JOIN citation_groups cg ON c.group_id=cg.id 
                                        WHERE cited_paper_id='%s' """ % paper_id
    cursor = db.query(query)
    rows = cursor.fetchall()

    # Group citations by paper
    citations = defaultdict(list)
    for citing_paper, start, end in rows :
        citations[citing_paper].append((start, end))

    return citations
__init__.py 文件源码 项目:python- 作者: secondtonone1 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
TraceAnalysis.py 文件源码 项目:VMAttack 作者: anatolikalysch 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def find_vm_addr(trace):
    """
    Find the virtual machine addr
    :param trace: instruction trace
    :return: virtual function start addr
    """
    push_dict = defaultdict(lambda: 0)
    vm_func_dict = defaultdict(lambda: 0)
    # try to find the vm Segment via series of push commands, which identify the vm_addr also
    for line in trace:
        try:
            if line.disasm[0] == 'push':
                push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1
        except:
            pass

    vm_func = max(push_dict, key=push_dict.get)
    vm_seg_start = SegStart(vm_func)
    vm_seg_end = SegEnd(vm_func)
    # test wheather the vm_func is the biggest func in the Segment
    vm_funcs = Functions(vm_seg_start, vm_seg_end)
    for f in vm_funcs:
        vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START)
    if max(vm_func_dict, key=vm_func_dict.get) != vm_func:
        return AskAddr(vm_func,
                "Found two possible addresses for the VM function start address: %s and %s. Choose one!" %
                (vm_func, max(vm_func_dict, key=vm_func_dict.get)))
    else:
        return vm_func
TraceAnalysis.py 文件源码 项目:VMAttack 作者: anatolikalysch 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def find_virtual_regs(trace, manual=False, update=None):
    """
    Maps the virtual registers on the stack to the actual registers after the vm exit.
    :param trace: instruction trace
    :return: virtual registers dict which maps the real regs onto virtual ones via stack addresses
    """
    vmr = get_vmr()
    assert isinstance(trace, Trace)
    virt_regs = defaultdict(lambda: False)
    # trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)

    while trace:
        try:
            elem = trace.pop(len(trace) - 1)
            if len(elem.disasm) > 0 and elem.disasm[0] == 'pop':
                opnd = elem.disasm[1]
                if get_reg_class(opnd) is None:  # if not a register it is a mem_loc
                    pass
                elif virt_regs[opnd]:
                    pass
                else:
                    # the context always shows the registers after the execution, so we nee the SP from the instruction before
                    stack_addr = trace[len(trace) - 1].ctx[get_reg('rsp', trace.ctx_reg_size)]
                    virt_regs[opnd] = stack_addr
        except:
            pass

    if update is not None:
        update.pbar_update(60)

    vmr.vm_stack_reg_mapping = virt_regs
    if manual:
        print ''.join('%s:%s\n' % (c, virt_regs[c]) for c in virt_regs.keys())
    return virt_regs
loc2lang.py 文件源码 项目:geomdn 作者: afshinrahimi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def city_dialect_words(model, vocab, filename='./city_ranking.txt'):
    #load named entities
    ne_file = './dumps/ne_' + dataset_name + '.json'
    with codecs.open(ne_file, 'r', encoding='utf-8') as fout:
        NEs = json.load(fout)
    NEs = set(NEs['nes'])

    k = 200
    with open('./data/cities.json', 'r') as fin:
        cities = json.load(fin)
    all_locs = np.array([[city['latitude'], city['longitude']] for city in cities]).astype('float32')
    all_probs = model.predict(all_locs)
    all_logprobs = np.log(all_probs)
    all_logprobs_mean = np.mean(all_logprobs, axis=0)
    city_dialectwords = defaultdict(list)

    cities = cities[0:200]
    for city in cities:
        name = city['city']
        lat, lon = city['latitude'], city['longitude']
        loc = np.array([[lat, lon]]).astype('float32')
        city_probs = model.predict(loc)
        city_logprobs = np.log(city_probs)
        normalized_city_logprobs = city_logprobs - all_logprobs_mean
        sorted_vocab_indices = np.argsort(normalized_city_logprobs)
        topwords = list(reversed(np.array(vocab)[sorted_vocab_indices][0].tolist()))[0:k]

        #check if a topword is a named entity add a star beside it
        dialect_words = []
        for topword in topwords:
            if topword in NEs:
                topword = "NE_" + topword
            dialect_words.append(topword)

        city_dialectwords[name] = dialect_words
        #write the city_dialectwords to file
        with codecs.open(filename, 'w', encoding='utf-8') as fout:
            json.dump(city_dialectwords, fout, indent=4, sort_keys=True)
__init__.py 文件源码 项目:ivaochdoc 作者: ivaoch 项目源码 文件源码 阅读 72 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
multi_channel_experiment.py 文件源码 项目:bof-aed 作者: rgrzeszi 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def read_files(self, annotations, channels):
        '''
        Read all files in the datapath and create features_windows dictionary.
        @param annotations: Annotations as read from annotation file
        @param channels: 1D numpy array of channel indices to use.
        @return: A dictionary containing a feature matrix [windows x features] with the classnames as keys
        '''

        if type(channels) == int or type(channels) == np.int64:
            channels = np.array([channels])
        elif type(channels) == list:
            channels = np.array(channels)

        features_frames = {}
        for classname in self.classes:
            features_frames[classname] = []

        features_windows = {}
        for classname in self.classes:
            features_windows[classname] = []

        annotation_dict = defaultdict(list)
        for anno in annotations:
            annotation_dict[anno[3]].append(anno)

        for filename, annos in annotation_dict.items():
            path = self.basepath + '/audio/' + filename
            self._read_windows(path, annos, features_windows, features_frames, channels)

        return features_windows, features_frames
collections.py 文件源码 项目:zippy 作者: securesystemslab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:news-for-good 作者: thecodinghub 项目源码 文件源码 阅读 74 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:Tencent_Cartoon_Download 作者: Fretice 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:fieldsight-kobocat 作者: awemulya 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
TRIE.py 文件源码 项目:Online-Book-Store 作者: siddhiparkar151992 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __init__(self):
        self.root = defaultdict()
TRIE.py 文件源码 项目:Online-Book-Store 作者: siddhiparkar151992 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def insertUtil(self, minHeap, word, duplicate):
        if self.root == None:
            self.root = defaultdict()
__init__.py 文件源码 项目:web_ctp 作者: molebot 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:CloudPrint 作者: William-An 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:ouroboros 作者: pybee 项目源码 文件源码 阅读 64 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:gardenbot 作者: GoestaO 项目源码 文件源码 阅读 136 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:projeto 作者: BarmyPenguin 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:flask-zhenai-mongo-echarts 作者: Fretice 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:aweasome_learning 作者: Knight-ZXW 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:kbe_server 作者: xiaohaoppy 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:blog_flask 作者: momantai 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
__init__.py 文件源码 项目:MyFriend-Rob 作者: lcheniv 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__
goldReader.py 文件源码 项目:oie-benchmark 作者: gabrielStanovsky 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            for line in fin:
                data = line.strip().split('\t')
                text, base_rel, rel = data[:3]
                args = data[3:]
                confidence = 1

                curExtraction = Extraction(pred = rel, sent = text, confidence = float(confidence))
                for arg in args:
                    curExtraction.addArg(arg)

                d[text].append(curExtraction)
        self.oie = d
IDADebugger.py 文件源码 项目:VMAttack 作者: anatolikalysch 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR):
        """
        Generate trace for the loaded binary.
        :param trace_start:
        :param trace_end:
        :return:
        """
        vmr = get_vmr()
        self.trace_init()
        # reset color
        heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA()))
        for i in heads:
            SetColor(i, CIC_ITEM, 0xFFFFFF)
        # start exec
        RunTo(BeginEA())
        event = GetDebuggerEvent(WFNE_SUSP, -1)
        # enable tracing
        EnableTracing(TRACE_STEP, 1)
        if vmr.sys_libs:
            pass
        event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1)
        while True:
            event = GetDebuggerEvent(WFNE_ANY, -1)
            addr = GetEventEa()

            # change color of executed line
            current_color = GetColor(addr, CIC_ITEM)
            new_color = self.get_new_color(current_color)
            SetColor(addr, CIC_ITEM, new_color)
            # break by exception
            if event <= 1:
                break

        # standardize the difference between ida_trace.txt files and generated trace files by debugger hook:
        # since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after
        for line in self.trace:
            try:
                line.ctx = self.trace[self.trace.index(line) + 1].ctx
            except IndexError:
                line.ctx = defaultdict(lambda: '0')
        # return the trace, for population see dbg_trace() below
        msg('[*] Trace generated!\n')
        if vmr.extract_param:
            vmr.func_args = self.func_args
            for key in self.func_args.keys():
                print 'Function %s call args:' % key, ''.join('%s, ' % arg for arg in self.func_args[key]).rstrip(', ')
        return self.trace


问题


面经


文章

微信
公众号

扫码关注公众号