source.py 文件源码-python代码片段

def __init__(self, filepath, columns=None, skipheader=0,
                 fmtfunc=lambda x: x, **kwargs):
        """
        ReadCSV(filepath, columns, skipheader, fmtfunc, **kwargs)

        Read data in Comma Separated Format (CSV) from file.
        See also CSVWriter.
        Can also read Tab Separated Format (TSV) be providing the
        corresponding delimiter. Note that in the docstring below
        delimiter is '\\t' but in code it should be '\t'. See unit tests.

        >>> from nutsflow import Collect
        >>> filepath = 'tests/data/data.csv'
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int) as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        >>> with ReadCSV(filepath, (2, 1), 1, int) as reader:
        ...     reader >> Collect()
        [(3, 2), (6, 5)]

        >>> with ReadCSV(filepath, 2, 1, int) as reader:
        ...     reader >> Collect()
        [3, 6]

        >>> filepath = 'tests/data/data.tsv'
        >>> with ReadCSV(filepath, skipheader=1, fmtfunc=int,
        ...                delimiter='\\t') as reader:
        ...     reader >> Collect()
        [(1, 2, 3), (4, 5, 6)]

        :param string filepath: Path to file in CSV format.
        :param tuple columns: Indices of the columns to read.
                              If None all columns are read.
        :param int skipheader: Number of header lines to skip.
        :param function fmtfunc: Function to apply to the elements of each row.
        :param kwargs kwargs: Keyword arguments for Python's CSV reader.
                              See https://docs.python.org/2/library/csv.html
        """
        self.csvfile = open(filepath, 'r')
        self.columns = columns if columns is None else as_tuple(columns)
        self.fmtfunc = fmtfunc
        for _ in range(skipheader):
            next(self.csvfile)
        itf.take(self.csvfile, skipheader)
        stripped = (r.strip() for r in self.csvfile)
        self.reader = csv.reader(stripped, **kwargs)