def get_dataframe_list(args, data_fields=('gene', 'raw_counts')):
# get a list of dataframes
dfs, files = [], args['files'] or []
# create an index using the filenames
# this will prevent having an overlong command line for 100's or 1000's of files
if args['file_index']:
with open(args['file_index']) as fp:
files.extend(fp.readlines())
files = sorted(filter(None, set([f.strip() for f in files])))
# now iterate over the files and get the looooong list of dataframes
for f in files:
# Get only specific columns with usecols
df = pd.read_table(f, usecols=data_fields)
dfs.append(df)
return dfs, files # a list of dataframes and the files index
评论列表
文章目录