python类to_numeric()的实例源码

itasserprop.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def df_coach_bsites(self):
        df_cols = ['site_num', 'c_score', 'cluster_size', 'algorithm',
                   'pdb_template_id', 'pdb_template_chain', 'pdb_ligand',
                   'binding_location_coords', 'c_score_method', 'binding_residues',
                   'ligand_cluster_counts']

        bsites_inf_df = pd.DataFrame.from_records(self.coach_bsites, columns=df_cols).drop_duplicates().reset_index(drop=True)

        if bsites_inf_df.empty:
            log.warning('Empty dataframe')
            return bsites_inf_df
        else:
            bsites_inf_df['c_score'] = pd.to_numeric(bsites_inf_df.c_score, errors='coerce')
            bsites_inf_df['cluster_size'] = pd.to_numeric(bsites_inf_df.cluster_size, errors='coerce')
            return ssbio.utils.clean_df(bsites_inf_df)
itasserprop.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def df_coach_go(self):
        cols = ['go_id', 'go_term', 'c_score']

        go_all_df = pd.DataFrame()

        for go_list in [self.coach_go_mf, self.coach_go_cc, self.coach_go_bp]:
            go_df = pd.DataFrame.from_records(go_list, columns=cols).drop_duplicates().reset_index(drop=True)
            go_df['c_score'] = pd.to_numeric(go_df.c_score, errors='coerce')

            if go_all_df.empty:
                go_all_df = go_df
            else:
                go_all_df.append(go_df)

        return go_all_df
itasserprop.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def parse_coach_ec_df(infile):
    """Parse the EC.dat output file of COACH and return a dataframe of results

    EC.dat contains the predicted EC number and active residues.
        The columns are: PDB_ID, TM-score, RMSD, Sequence identity,
        Coverage, Confidence score, EC number, and Active site residues

    Args:
        infile (str): Path to EC.dat

    Returns:
        DataFrame: Pandas DataFrame summarizing EC number predictions

    """

    ec_df = pd.read_table(infile, delim_whitespace=True,
                          names=['pdb_template', 'tm_score', 'rmsd', 'seq_ident', 'seq_coverage',
                                 'c_score', 'ec_number', 'binding_residues'])

    ec_df['pdb_template_id'] = ec_df['pdb_template'].apply(lambda x: x[:4])
    ec_df['pdb_template_chain'] = ec_df['pdb_template'].apply(lambda x: x[4])

    ec_df = ec_df[['pdb_template_id', 'pdb_template_chain', 'tm_score', 'rmsd',
                   'seq_ident', 'seq_coverage', 'c_score', 'ec_number', 'binding_residues']]
    ec_df['c_score'] = pd.to_numeric(ec_df.c_score, errors='coerce')

    return ec_df
dovseries.py 文件源码 项目:pydov 作者: DOV-Vlaanderen 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _get_peilmetingen_df(self):
        """"""
        doc_df = pd.DataFrame(list(self.get_peilmetingen()),
                              columns=["grondwaterlocatie",
                                       "filternummer",
                                       "datum",
                                       "diepte",
                                       "methode",
                                       "betrouwbaarheid"])
        doc_df["datum"] = pd.to_datetime(doc_df["datum"])
        doc_df["diepte"] = pd.to_numeric(doc_df["diepte"])
        doc_df = doc_df.set_index("datum")
        return doc_df
dovseries.py 文件源码 项目:pydov 作者: DOV-Vlaanderen 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def _get_observaties_df(self):
        """"""
        doc_df = pd.DataFrame(list(self.get_observaties()),
                              columns=["grondwaterlocatie",
                                       "filternummer",
                                       "monsternummer",
                                       "datum",
                                       "parameter",
                                       "waarde",
                                       "eenheid",
                                       "betrouwbaarheid"])
        doc_df["datum"] = pd.to_datetime(doc_df["datum"])
        doc_df["waarde"] = pd.to_numeric(doc_df["waarde"])
        return doc_df
functions.py 文件源码 项目:pauvre 作者: conchoecia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def filter_fastq_length_meanqual(df, min_len, max_len,
                                 min_mqual, max_mqual):
    querystring = "length >= {0} and meanQual >= {1}".format(min_len, min_mqual)
    if max_len != None:
        querystring += " and length <= {}".format(max_len)
    if max_mqual != None:
        querystring += " and meanQual <= {}".format(max_mqual)
    print("Keeping reads that satisfy: {}".format(querystring), file=stderr)
    filtdf = df.query(querystring)
    #filtdf["length"] = pd.to_numeric(filtdf["length"], errors='coerce')
    #filtdf["meanQual"] = pd.to_numeric(filtdf["meanQual"], errors='coerce')
    return filtdf
_visualizer.py 文件源码 项目:q2-diversity 作者: qiime2 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata) -> None:
    # convert metadata to numeric values where applicable, drop the non-numeric
    # values, and then drop samples that contain NaNs
    df = metadata.to_dataframe()
    df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

    # filter categorical columns
    pre_filtered_cols = set(df.columns)
    df = df.select_dtypes([numpy.number]).dropna()
    filtered_categorical_cols = pre_filtered_cols - set(df.columns)

    # filter 0 variance numerical columns
    pre_filtered_cols = set(df.columns)
    df = df.loc[:, df.var() != 0]
    filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)

    # filter the distance matrix to exclude samples that were dropped from
    # the metadata, and keep track of how many samples survived the filtering
    # so that information can be presented to the user.
    initial_dm_length = distance_matrix.shape[0]
    distance_matrix = distance_matrix.filter(df.index, strict=False)
    filtered_dm_length = distance_matrix.shape[0]

    result = skbio.stats.distance.bioenv(distance_matrix, df)
    result = q2templates.df_to_html(result)

    index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_dm_length': initial_dm_length,
        'filtered_dm_length': filtered_dm_length,
        'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
        'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
        'result': result})
zipline.py 文件源码 项目:quantrocket-client 作者: quantrocket-llc 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def from_csv(cls, filepath_or_buffer):

        # Import pandas lazily since it can take a moment to import
        try:
            import pandas as pd
        except ImportError:
            raise ImportError("pandas must be installed to use ZiplineBacktestResult")

        zipline_result = cls()

        results = pd.read_csv(
            filepath_or_buffer,
            parse_dates=["date"],
            index_col=["dataframe", "index", "date", "column"])["value"]

        # Extract returns
        returns = results.loc["returns"].unstack()
        returns.index = returns.index.droplevel(0).tz_localize("UTC")
        zipline_result.returns = returns["returns"].astype(float)

        # Extract positions
        positions = results.loc["positions"].unstack()
        positions.index = positions.index.droplevel(0).tz_localize("UTC")
        zipline_result.positions = positions.astype(float)

        # Extract transactions
        transactions = results.loc["transactions"].unstack()
        transactions.index = transactions.index.droplevel(0).tz_localize("UTC")
        zipline_result.transactions = transactions.apply(pd.to_numeric, errors='ignore')

        # Extract benchmark returns
        benchmark_returns = results.loc["benchmark"].unstack()
        benchmark_returns.index = benchmark_returns.index.droplevel(0).tz_localize("UTC")
        zipline_result.benchmark_returns = benchmark_returns["benchmark"].astype(float)

        # Extract performance dataframe
        perf = results.loc["perf"].unstack()
        perf.index = perf.index.droplevel(0).tz_localize("UTC")
        zipline_result.perf = perf.apply(pd.to_numeric, errors='ignore')

        return zipline_result
DataClean_GS_Analysis5.py 文件源码 项目:Python-Scripts-Repo-on-Data-Science 作者: qalhata 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def check_null_or_valid(row_data):
    """Function that takes a row of data,
    drops all missing values,
    and checks if all remaining values are greater than or equal to 0
    """
    no_na = row_data.dropna()[1:-1]
    numeric = pd.to_numeric(no_na)
    ge0 = numeric >= 0
    return ge0

# Check whether the first column is 'Life expectancy'
desert_mirage_lib.py 文件源码 项目:desert-mirage 作者: valentour 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def eliminate_invalids(df, cols):
    """Eliminate invalid data in ``cols`` of ``df``."""
    numdf = df.drop(cols, axis=1).join(df[cols].apply(pd.to_numeric,
                                                      errors='coerce'))
    numdf = numdf[~numdf[cols].isnull().apply(np.any, axis=1)]
    return numdf
desert_mirage_lib.py 文件源码 项目:desert-mirage 作者: valentour 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def partial_convert_only_numerics(df):
    """Convert ``df`` numeric cols and try to coerce any errors encountered."""
    col_dict = df_cols_by_type(df)
    partial_convert = partial(pd.to_numeric, errors='coerce')
    df[col_dict['numeric']].apply(partial_convert)
    return df

# Useful one-liners.
# df.select_dtypes(include=['bool'])
# list(df.select_dtypes(include=['bool']).columns)
onsset.py 文件源码 项目:PyOnSSET 作者: KTH-dESA 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def condition_df(self):
        """
        Do any initial data conditioning that may be required.
        """

        logging.info('Ensure that columns that are supposed to be numeric are numeric')
        self.df[SET_GHI] = pd.to_numeric(self.df[SET_GHI], errors='coerce')
        self.df[SET_WINDVEL] = pd.to_numeric(self.df[SET_WINDVEL], errors='coerce')
        self.df[SET_NIGHT_LIGHTS] = pd.to_numeric(self.df[SET_NIGHT_LIGHTS], errors='coerce')
        self.df[SET_ELEVATION] = pd.to_numeric(self.df[SET_ELEVATION], errors='coerce')
        self.df[SET_SLOPE] = pd.to_numeric(self.df[SET_SLOPE], errors='coerce')
        self.df[SET_LAND_COVER] = pd.to_numeric(self.df[SET_LAND_COVER], errors='coerce')
        self.df[SET_GRID_DIST_CURRENT] = pd.to_numeric(self.df[SET_GRID_DIST_CURRENT], errors='coerce')
        self.df[SET_GRID_DIST_PLANNED] = pd.to_numeric(self.df[SET_GRID_DIST_PLANNED], errors='coerce')
        self.df[SET_SUBSTATION_DIST] = pd.to_numeric(self.df[SET_SUBSTATION_DIST], errors='coerce')
        self.df[SET_ROAD_DIST] = pd.to_numeric(self.df[SET_ROAD_DIST], errors='coerce')
        self.df[SET_HYDRO_DIST] = pd.to_numeric(self.df[SET_HYDRO_DIST], errors='coerce')
        self.df[SET_HYDRO] = pd.to_numeric(self.df[SET_HYDRO], errors='coerce')
        self.df[SET_SOLAR_RESTRICTION] = pd.to_numeric(self.df[SET_SOLAR_RESTRICTION], errors='coerce')

        logging.info('Replace null values with zero')
        self.df.fillna(0, inplace=True)

        logging.info('Sort by country, Y and X')
        self.df.sort_values(by=[SET_COUNTRY, SET_Y, SET_X], inplace=True)

        logging.info('Add columns with location in degrees')
        project = Proj('+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs')

        def get_x(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return x

        def get_y(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return y

        self.df[SET_X_DEG] = self.df.apply(get_x, axis=1)
        self.df[SET_Y_DEG] = self.df.apply(get_y, axis=1)
onsset.py 文件源码 项目:PyOnSSET 作者: KTH-dESA 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def condition_df(self):
        """
        Do any initial data conditioning that may be required.
        """

        logging.info('Ensure that columns that are supposed to be numeric are numeric')
        self.df[SET_GHI] = pd.to_numeric(self.df[SET_GHI], errors='coerce')
        self.df[SET_WINDVEL] = pd.to_numeric(self.df[SET_WINDVEL], errors='coerce')
        self.df[SET_NIGHT_LIGHTS] = pd.to_numeric(self.df[SET_NIGHT_LIGHTS], errors='coerce')
        self.df[SET_ELEVATION] = pd.to_numeric(self.df[SET_ELEVATION], errors='coerce')
        self.df[SET_SLOPE] = pd.to_numeric(self.df[SET_SLOPE], errors='coerce')
        self.df[SET_LAND_COVER] = pd.to_numeric(self.df[SET_LAND_COVER], errors='coerce')
        self.df[SET_GRID_DIST_CURRENT] = pd.to_numeric(self.df[SET_GRID_DIST_CURRENT], errors='coerce')
        self.df[SET_GRID_DIST_PLANNED] = pd.to_numeric(self.df[SET_GRID_DIST_PLANNED], errors='coerce')
        self.df[SET_SUBSTATION_DIST] = pd.to_numeric(self.df[SET_SUBSTATION_DIST], errors='coerce')
        self.df[SET_ROAD_DIST] = pd.to_numeric(self.df[SET_ROAD_DIST], errors='coerce')
        self.df[SET_HYDRO_DIST] = pd.to_numeric(self.df[SET_HYDRO_DIST], errors='coerce')
        self.df[SET_HYDRO] = pd.to_numeric(self.df[SET_HYDRO], errors='coerce')
        self.df[SET_SOLAR_RESTRICTION] = pd.to_numeric(self.df[SET_SOLAR_RESTRICTION], errors='coerce')

        logging.info('Replace null values with zero')
        self.df.fillna(0, inplace=True)

        logging.info('Sort by country, Y and X')
        self.df.sort_values(by=[SET_COUNTRY, SET_Y, SET_X], inplace=True)

        logging.info('Add columns with location in degrees')
        project = Proj('+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs')

        def get_x(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return x

        def get_y(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return y

        self.df[SET_X_DEG] = self.df.apply(get_x, axis=1)
        self.df[SET_Y_DEG] = self.df.apply(get_y, axis=1)
addViscous_Bowcutt.py 文件源码 项目:VC3D 作者: AlexanderWard1 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def saveSlice_CSV(self, outputFilename=outputFilename, xSlice=[], ySlice=[], zSlice=[]):
        """ Take a slice and save it to csv """
        outputFilename += '_slice.csv'

#        # This defines how 'narrow' slice we want. Why am I writing this if ParaView will do it fark
#        tol = 1e-2
#        
#        # Pre allocate empty DF here?
#        slicedData = pd.DataFrame()
#        
#        if not xSlice:
#            # We have some slices along x to make
#            for point in xSlice:
#                # we want to slice at all of these points
#                > xSlice[point] - tol
#            self.flowData.transpose().loc[(self.flowData.transpose()["x"] > 0.599 & self.flowData.transpose()["x"] < 0.601 &  self.flowData.transpose()["z"] == 0), "cf"]
#        elif not ySlice:
#            # Slices along y to take
#        elif not zSlice:
#            # And slices aong z

        flowData = self.flowData.apply(pd.to_numeric, errors='ignore')

        slicedData_indices = (flowData["z"] > -0.01) & (flowData["z"] < 0.01)

        slicedData = flowData.loc[slicedData_indices]

        slicedData.to_csv(outputFilename, sep=',', index=0, index_label=0)

        print "Slices saved in", outputFilename
addViscous.py 文件源码 项目:VC3D 作者: AlexanderWard1 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def saveSlice_CSV(self, outputFilename=outputFilename, xSlice=[], ySlice=[], zSlice=[]):
        """ Take a slice and save it to csv """
        outputFilename += '_slice.csv'

#        # This defines how 'narrow' slice we want. Why am I writing this if ParaView will do it fark
#        tol = 1e-2
#        
#        # Pre allocate empty DF here?
#        slicedData = pd.DataFrame()
#        
#        if not xSlice:
#            # We have some slices along x to make
#            for point in xSlice:
#                # we want to slice at all of these points
#                > xSlice[point] - tol
#            self.flowData.transpose().loc[(self.flowData.transpose()["x"] > 0.599 & self.flowData.transpose()["x"] < 0.601 &  self.flowData.transpose()["z"] == 0), "cf"]
#        elif not ySlice:
#            # Slices along y to take
#        elif not zSlice:
#            # And slices aong z

        flowData = self.flowData.apply(pd.to_numeric, errors='ignore')

        slicedData_indices = (flowData["y"] > 0.598) & (flowData["y"] < 0.602) & (flowData["z"] == 0)

        slicedData = flowData.loc[slicedData_indices]

        slicedData.to_csv(outputFilename, sep=',', index=0, index_label=0)

        print "Slices saved in", outputFilename
common.py 文件源码 项目:singlecell-dash 作者: czbiohub 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def maybe_to_numeric(series):
    try:
        return pd.to_numeric(series)
    except ValueError:
        return series
core.py 文件源码 项目:SSieve 作者: davidimprovz 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def createPriceHistoryReport(self, stock):
        """
        Calls get10YrPriceHistory() to package a price history report into a PANDAS dataframe, then cleans and returns the data.

        This function will acquire a price history for the provided symbol, which must be a string and a valid stock symbol
        along with the symbol's exchange, e.g., ('MMM', 'NYSE'). The get10YrPriceHistory() function requires the exchange.

        After the data is loaded, the function adds a Symbol field to the price history for tracking in the database, reindexes 
        and renames some fields, properly formats the dates into datetime fields, and converts prices from strings to floats.

        Returns the report as a PANDAS dataframe if successful, otherwise a tuple (False, error message).

        Example Usage: createPriceHistoryReport(('MMM', 'NYSE'))
        """
        try:
            # get the raw data from morningstar    
            price_history = self.get10YrPriceHistory(stock)

            if isinstance(price_history, pd.DataFrame): # the price_history has to exist, or else return the err msg of the function called

                price_history['Symbol'] = stock[0]
                # reorganize header order
                price_history = price_history.reindex(columns=['Symbol','Date','Open','High','Low','Close','Volume'])
                # rename the Date column for easier processing through SQLite's Date functionality
                price_history.rename(columns={'Date':'Reference'}, inplace=True)
                # convert all dates to ISO formatted yyyy-mm-dd strings
                price_history['Reference'] = price_history['Reference'].apply(lambda x: time.strftime("%Y-%m-%d", time.strptime(x, "%m/%d/%Y")))

                # convert volumes to integers # unicode err on ??? value for some volumes goes to NaN

                price_history['Volume'] = pd.to_numeric(price_history['Volume'].str.replace(',',''), errors='coerce')
                # set index b/f db commit so no duplicate numeric index columns
                price_history.set_index(['Symbol'], inplace=True)

            return price_history

        except Exception as e:
            return (False, e)

    # get10YrPriceHistory
    # ******************* #
charades.py 文件源码 项目:actions-for-actions 作者: gsig 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def load_groundtruth(self):
        gt_labels = pd.read_csv(self.data_path)
        if self.subset is not None:
            mask = [True if x in self.subset else False for x in gt_labels['id'].values]
            gt_labels = gt_labels[mask]
            assert np.any(np.array(mask))
        gt_labels['length'] = pd.to_numeric(gt_labels['length'])
        gt_labels['actions'].fillna('', inplace=True)
        self.gt_labels = gt_labels
tracking.py 文件源码 项目:georges 作者: chernals 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_madx_tracking(file):
    """Read a MAD-X Tracking onetable=true file to a dataframe."""
    column_names = ['ID', 'TURN', 'X', 'PX', 'Y', 'PY', 'T', 'PT', 'S', 'E']
    data = pd.read_csv(file, skiprows=MADX_TRACKING_SKIP_ROWS, delim_whitespace=True, names=column_names)
    return data.apply(pd.to_numeric, errors="ignore").dropna()
metadata.py 文件源码 项目:qiime2 作者: qiime2 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def to_dataframe(self, cast_numeric=False):
        df = self._dataframe.copy()

        if cast_numeric:
            df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

        return df


问题


面经


文章

微信
公众号

扫码关注公众号