ocyara.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:OCyara 作者: bandrel 项目源码 文件源码
def _process_image(self, yara_rule: str, save_context: bool) -> None:
        """
        Perform OCR and yara rule matching as a worker.

        process_image() is used by the run() method to create multiple worker processes for
        parallel execution.  process_image normally will not be called directly.

        Arguments:
            yara_rule -- File path pointing to a Yara rule file
        """
        context = None
        handler = colorlog.StreamHandler()
        handler.setFormatter(colorlog.ColoredFormatter(
                '%(log_color)s%(levelname)s:%(name)s:%(message)s'))
        # Creates a logger object for the individual workers that contains the PID as part of the message header
        worker_logger = colorlog.getLogger('worker_'+str(os.getpid()))
        worker_logger.addHandler(handler)
        worker_logger.setLevel(self.logger.level)
        worker_logger.info('PID {0} created to process queue'.format(str(os.getpid())))
        while True:
            try:
                image, filepath = self.q.get(timeout=.25)
            except Empty:
                if self.total_added_to_queue[0] == self.total_items_to_queue[0]:
                    worker_logger.debug('Queue Empty PID %d exiting' % os.getpid())
                    return
                else:
                    worker_logger.debug('Queue still loading')
                    continue
            ocrtext = tesserocr.image_to_text(image)
            try:
                rules = yara.compile(yara_rule)
            except yara.Error:
                worker_logger.error('Invalid rule file/rule file not found: {0}'.format(yara_rule))
            matches = rules.match(data=ocrtext)
            # If there is a match, store the filename and the rule in a dictionary local to this process.
            # Create an editable copy of the master manager dict.
            with self.lock:
                local_results_dict = self.matchedfiles[0]
                if matches:
                    if save_context:
                        context = ocrtext
                    for x in matches:
                        try:
                            local_results_dict[filepath].append((x.rule, context))
                        except KeyError:
                            local_results_dict[filepath] = [(x.rule, context)]
                    self.matchedfiles[0] = local_results_dict
            self.queue_items_completed[0] += 1
            self.q.task_done()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号