mergerawfiles.py 文件源码-python代码片段

def handle(self, *args, **options):
        merged_file_path = os.path.join(settings.DATA_DIR, 'merged.csv')
        with open(merged_file_path, 'w') as merged_file:
            writer = csv.DictWriter(merged_file,
                                    fieldnames=self.get_fieldnames())
            writer.writeheader()

            for year in range(2006, 2016):
                print('Processing {}'.format(year))
                raw_file_path = os.path.join(settings.DATA_DIR,
                                             'salary',
                                             'salary_{}.csv'.format(year))
                with open(raw_file_path, 'r') as raw_file:
                    reader = csv.DictReader(raw_file)
                    for row in reader:
                        # First name and last name are distinct fields for
                        # 2013 onwards
                        if year > 2012:
                            # Don't want starred names
                            if '*' in row['last']:
                                continue
                        else:
                            # Don't want starred names
                            if '*' in row['name'] or '---' in row['name']:
                                continue
                            names = row['name'].split(',')
                            row['last'] = names[0]
                            row['first'] = ' '.join(names[1:])

                        row['year'] = str(year)
                        row = self.clean_row(row)

                        # Attempt to deal with middle names
                        first_names = [name.replace('.', '')
                                       for name in row['first'].split(' ')]
                        if len(first_names) > 1:
                            row['first'] = first_names[0]
                            row['middle'] = ' '.join(first_names[1:])

                        row['name'] = '{}, {}'.format(
                            row['last'],
                            row['first'])

                        writer.writerow(row)