pandas_mongo_bridge.py 文件源码-python代码片段

def export_to(self, file_path, batchsize=1000):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query, start=start,
                                       limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats
                # so ignore sections that have 0 records
                if len(records) > 0:
                    # use a different group delimiter if needed
                    columns = section["columns"]
                    if self.group_delimiter != DEFAULT_GROUP_DELIMITER:
                        columns = [self.group_delimiter.join(col.split("/"))
                                   for col in columns]
                    columns = columns + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer, section_name,
                                          header=header, index=False)
            header = False
            # increment counter(s)
            start += batchsize
            time.sleep(0.1)
        self.xls_writer.save()