def __init__(self, *args):
super(IDADebugger, self).__init__(*args)
self.hooked = False
self.trace = Trace()
self._module_name = 'IDADbg'
self.arch = get_arch_dynamic()
# init the cpu context with 0
if self.arch == 32:
self.ctx = {c: '0' for c in ['eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf',
'af', 'tf', 'df']}
elif self.arch == 64:
self.ctx = {c: '0' for c in ['rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12',
'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']}
self.IAT = []
self.func_args = defaultdict(lambda: set())
python类defaultdict()的实例源码
def get_named_entities(documents, mincount=10):
'''
given a list of texts find words that more than
50% of time start with a capital letter and return them as NE
'''
word_count = defaultdict(int)
word_capital = defaultdict(int)
NEs = []
token_pattern = r'(?u)(?<![#@])\b\w+\b'
tp = re.compile(token_pattern)
for doc in documents:
words = tp.findall(doc)
for word in words:
if word[0].isupper():
word_capital[word.lower()] += 1
word_count[word.lower()] += 1
for word, count in word_count.iteritems():
if count < mincount: continue
capital = word_capital[word]
percent = float(capital) / count
if percent > 0.7:
NEs.append(word)
return NEs
def get_named_entities(documents, mincount=10):
'''
given a list of texts find words that more than
50% of time start with a capital letter and return them as NE
'''
word_count = defaultdict(int)
word_capital = defaultdict(int)
NEs = []
token_pattern = r'(?u)(?<![#@])\b\w+\b'
tp = re.compile(token_pattern)
for doc in documents:
words = tp.findall(doc)
for word in words:
if word[0].isupper():
word_capital[word.lower()] += 1
word_count[word.lower()] += 1
for word, count in word_count.iteritems():
if count < mincount: continue
capital = word_capital[word]
percent = float(capital) / count
if percent > 0.7:
NEs.append(word)
return NEs
def calc_log_prob_for_files(self, annotations):
'''
Calculate the logprobs for the classification windows given in annotations
@param annotations: Annotations as read from annotation file
@return: tuple (features, labels). features is a list of logprobs-matrices for the windows.
labels is numpy-array of the labels for the respective windows.
'''
features = []
labels = []
annotation_dict = defaultdict(list)
for anno in annotations:
annotation_dict[anno[3]].append(anno)
for filename, annos in annotation_dict.items():
path = self.basepath + '/audio/' + filename
self._calc_log_probs_for_windows(path, annos, features, labels)
return features, np.array(labels)
def read_test_files(self, annotation_file):
'''
Read files for testing
'''
features_test = []
labels_test = []
annotation_file = self.basepath + '/annotations/general/' + annotation_file
annotations = self._read_annotations(annotation_file)
annotation_dict = defaultdict(list)
for anno in annotations:
annotation_dict[anno[3]].append(anno)
for filename, annos in annotation_dict.items():
path = self.basepath + '/audio/' + filename
features, labels = self._read_test_windows(path, annos)
features_test.extend(features)
labels_test.extend(labels)
return features_test, labels_test
def get_citation_positions(db, paper_id) :
query = """SELECT r.paper_id,
cg.start, cg.end
FROM refs r
JOIN citations c ON r.id=c.ref_id
JOIN citation_groups cg ON c.group_id=cg.id
WHERE cited_paper_id='%s' """ % paper_id
cursor = db.query(query)
rows = cursor.fetchall()
# Group citations by paper
citations = defaultdict(list)
for citing_paper, start, end in rows :
citations[citing_paper].append((start, end))
return citations
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def find_vm_addr(trace):
"""
Find the virtual machine addr
:param trace: instruction trace
:return: virtual function start addr
"""
push_dict = defaultdict(lambda: 0)
vm_func_dict = defaultdict(lambda: 0)
# try to find the vm Segment via series of push commands, which identify the vm_addr also
for line in trace:
try:
if line.disasm[0] == 'push':
push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1
except:
pass
vm_func = max(push_dict, key=push_dict.get)
vm_seg_start = SegStart(vm_func)
vm_seg_end = SegEnd(vm_func)
# test wheather the vm_func is the biggest func in the Segment
vm_funcs = Functions(vm_seg_start, vm_seg_end)
for f in vm_funcs:
vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START)
if max(vm_func_dict, key=vm_func_dict.get) != vm_func:
return AskAddr(vm_func,
"Found two possible addresses for the VM function start address: %s and %s. Choose one!" %
(vm_func, max(vm_func_dict, key=vm_func_dict.get)))
else:
return vm_func
def find_virtual_regs(trace, manual=False, update=None):
"""
Maps the virtual registers on the stack to the actual registers after the vm exit.
:param trace: instruction trace
:return: virtual registers dict which maps the real regs onto virtual ones via stack addresses
"""
vmr = get_vmr()
assert isinstance(trace, Trace)
virt_regs = defaultdict(lambda: False)
# trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)
while trace:
try:
elem = trace.pop(len(trace) - 1)
if len(elem.disasm) > 0 and elem.disasm[0] == 'pop':
opnd = elem.disasm[1]
if get_reg_class(opnd) is None: # if not a register it is a mem_loc
pass
elif virt_regs[opnd]:
pass
else:
# the context always shows the registers after the execution, so we nee the SP from the instruction before
stack_addr = trace[len(trace) - 1].ctx[get_reg('rsp', trace.ctx_reg_size)]
virt_regs[opnd] = stack_addr
except:
pass
if update is not None:
update.pbar_update(60)
vmr.vm_stack_reg_mapping = virt_regs
if manual:
print ''.join('%s:%s\n' % (c, virt_regs[c]) for c in virt_regs.keys())
return virt_regs
def city_dialect_words(model, vocab, filename='./city_ranking.txt'):
#load named entities
ne_file = './dumps/ne_' + dataset_name + '.json'
with codecs.open(ne_file, 'r', encoding='utf-8') as fout:
NEs = json.load(fout)
NEs = set(NEs['nes'])
k = 200
with open('./data/cities.json', 'r') as fin:
cities = json.load(fin)
all_locs = np.array([[city['latitude'], city['longitude']] for city in cities]).astype('float32')
all_probs = model.predict(all_locs)
all_logprobs = np.log(all_probs)
all_logprobs_mean = np.mean(all_logprobs, axis=0)
city_dialectwords = defaultdict(list)
cities = cities[0:200]
for city in cities:
name = city['city']
lat, lon = city['latitude'], city['longitude']
loc = np.array([[lat, lon]]).astype('float32')
city_probs = model.predict(loc)
city_logprobs = np.log(city_probs)
normalized_city_logprobs = city_logprobs - all_logprobs_mean
sorted_vocab_indices = np.argsort(normalized_city_logprobs)
topwords = list(reversed(np.array(vocab)[sorted_vocab_indices][0].tolist()))[0:k]
#check if a topword is a named entity add a star beside it
dialect_words = []
for topword in topwords:
if topword in NEs:
topword = "NE_" + topword
dialect_words.append(topword)
city_dialectwords[name] = dialect_words
#write the city_dialectwords to file
with codecs.open(filename, 'w', encoding='utf-8') as fout:
json.dump(city_dialectwords, fout, indent=4, sort_keys=True)
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def read_files(self, annotations, channels):
'''
Read all files in the datapath and create features_windows dictionary.
@param annotations: Annotations as read from annotation file
@param channels: 1D numpy array of channel indices to use.
@return: A dictionary containing a feature matrix [windows x features] with the classnames as keys
'''
if type(channels) == int or type(channels) == np.int64:
channels = np.array([channels])
elif type(channels) == list:
channels = np.array(channels)
features_frames = {}
for classname in self.classes:
features_frames[classname] = []
features_windows = {}
for classname in self.classes:
features_windows[classname] = []
annotation_dict = defaultdict(list)
for anno in annotations:
annotation_dict[anno[3]].append(anno)
for filename, annos in annotation_dict.items():
path = self.basepath + '/audio/' + filename
self._read_windows(path, annos, features_windows, features_frames, channels)
return features_windows, features_frames
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __init__(self):
self.root = defaultdict()
def insertUtil(self, minHeap, word, duplicate):
if self.root == None:
self.root = defaultdict()
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def read(self, fn):
d = defaultdict(lambda: [])
with open(fn) as fin:
for line in fin:
data = line.strip().split('\t')
text, base_rel, rel = data[:3]
args = data[3:]
confidence = 1
curExtraction = Extraction(pred = rel, sent = text, confidence = float(confidence))
for arg in args:
curExtraction.addArg(arg)
d[text].append(curExtraction)
self.oie = d
def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR):
"""
Generate trace for the loaded binary.
:param trace_start:
:param trace_end:
:return:
"""
vmr = get_vmr()
self.trace_init()
# reset color
heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA()))
for i in heads:
SetColor(i, CIC_ITEM, 0xFFFFFF)
# start exec
RunTo(BeginEA())
event = GetDebuggerEvent(WFNE_SUSP, -1)
# enable tracing
EnableTracing(TRACE_STEP, 1)
if vmr.sys_libs:
pass
event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1)
while True:
event = GetDebuggerEvent(WFNE_ANY, -1)
addr = GetEventEa()
# change color of executed line
current_color = GetColor(addr, CIC_ITEM)
new_color = self.get_new_color(current_color)
SetColor(addr, CIC_ITEM, new_color)
# break by exception
if event <= 1:
break
# standardize the difference between ida_trace.txt files and generated trace files by debugger hook:
# since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after
for line in self.trace:
try:
line.ctx = self.trace[self.trace.index(line) + 1].ctx
except IndexError:
line.ctx = defaultdict(lambda: '0')
# return the trace, for population see dbg_trace() below
msg('[*] Trace generated!\n')
if vmr.extract_param:
vmr.func_args = self.func_args
for key in self.func_args.keys():
print 'Function %s call args:' % key, ''.join('%s, ' % arg for arg in self.func_args[key]).rstrip(', ')
return self.trace