def __call__(self, binary):
# general statistics about sections
general = [len(binary.sections), # total number of sections
# number of sections with nonzero size
sum(1 for s in binary.sections if s.size == 0),
# number of sections with an empty name
sum(1 for s in binary.sections if s.name == ""),
sum(1 for s in binary.sections if s.has_characteristic(lief.PE.SECTION_CHARACTERISTICS.MEM_READ)
and s.has_characteristic(lief.PE.SECTION_CHARACTERISTICS.MEM_EXECUTE)), # number of RX
sum(1 for s in binary.sections if s.has_characteristic(
lief.PE.SECTION_CHARACTERISTICS.MEM_WRITE)), # number of W
]
# gross characteristics of each section
section_sizes = [(s.name, len(s.content)) for s in binary.sections]
section_entropy = [(s.name, s.entropy) for s in binary.sections]
section_vsize = [(s.name, s.virtual_size) for s in binary.sections]
# properties of entry point, or if invalid, the first executable section
try:
entry = binary.section_from_offset(binary.entrypoint)
except lief.not_found:
# bad entry point, let's find the first executable section
entry = None
for s in binary.sections:
if lief.PE.SECTION_CHARACTERISTICS.MEM_EXECUTE in s.characteristics_lists:
entry = s
break
if entry is not None:
entry_name = [entry.name]
entry_characteristics = [str(c)
for c in entry.characteristics_lists]
# ['SECTION_CHARACTERISTICS.CNT_CODE', 'SECTION_CHARACTERISTICS.MEM_EXECUTE','SECTION_CHARACTERISTICS.MEM_READ']
else:
entry_name = []
entry_characteristics = []
# let's dump all this info into a single vector
return np.concatenate([
np.atleast_2d(np.asarray(general, dtype=self.dtype)),
FeatureHasher(50, input_type="pair", dtype=self.dtype).transform(
[section_sizes]).toarray(),
FeatureHasher(50, input_type="pair", dtype=self.dtype).transform(
[section_entropy]).toarray(),
FeatureHasher(50, input_type="pair", dtype=self.dtype).transform(
[section_vsize]).toarray(),
FeatureHasher(50, input_type="string", dtype=self.dtype).transform(
[entry_name]).toarray(),
FeatureHasher(50, input_type="string", dtype=self.dtype).transform([entry_characteristics]).toarray()
], axis=-1).flatten().astype(self.dtype)
评论列表
文章目录