python类to_numeric()的实例源码-面圈网

TADPOLE_D1_D2.py 文件源码项目：TADPOLE 作者: noxtoby 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def checkFSXvalsAgainstADNIMERGE(tadpoleDF, mriADNI1FileFSX, otherSSvisCodeStr, ssNameTag,
                                 ignoreMissingCols = False):
  nrRows, nrCols = tadpoleDF.shape
  colListOtherSS = list(ssDF.columns.values)
  colListTadpoleDF = list(tadpoleDF.columns.values)

  tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]] = \
    tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]].apply(pd.to_numeric, errors='coerce')


  tadpoleDF['HIPPOSUM'] = tadpoleDF['ST29SV%s' % ssNameTag] + tadpoleDF['ST88SV%s' % ssNameTag]
  for r in range(nrRows):

    valsNan = np.isnan(tadpoleDF['Hippocampus'][r]) or (np.isnan(tadpoleDF['ST29SV%s' % ssNameTag][r]) and \
                 np.isnan(tadpoleDF['ST88SV%s' % ssNameTag][r]))
    if valsNan:
      continue

    valsNotEq = tadpoleDF['Hippocampus'][r] != (tadpoleDF['ST29SV%s' % ssNameTag][r] + tadpoleDF['ST88SV%s' % ssNameTag][r])
    if valsNotEq:
      print('entries dont match\n ', tadpoleDF[['RID','VISCODE', 'Hippocampus', 'ST29SV%s' % ssNameTag,\
        'ST88SV%s' % ssNameTag, 'HIPPOSUM']].iloc[r])

  # Conclusion: the reason why entries above don't match is because UCSFFSX has duplicate entries for the same subject and viscode.

charades.py 文件源码项目：actions-for-actions 作者: gsig 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def load_submission(self, submission_file):
        loc_submission = pd.read_csv(submission_file, header=None)
        build_proc_sub = loc_submission[0].str.split(' ').values.tolist()
        assert len(build_proc_sub[0]) == self.n_classes + len(self.submission_columns)
        proc_sub = pd.DataFrame.from_records(build_proc_sub, columns=[self.submission_columns + list(range(self.n_classes))])
        if self.subset is not None:
            if type(proc_sub['frame_id'].values[0]) is np.ndarray:
                mask = [True if x[0] in self.subset else False for x in proc_sub['frame_id'].values]
            else:
                # old pandas version
                mask = [True if x in self.subset else False for x in proc_sub['frame_id'].values]
            proc_sub = proc_sub[mask]
            assert np.any(np.array(mask))
        num_proc_sub = proc_sub.apply(pd.to_numeric, errors='ignore')
        grouped_by_vid = num_proc_sub
        self.submission = grouped_by_vid

dataframe_builders.py 文件源码项目：betterself 作者: jeffshek 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def build_dataframe(self):
        if not self.values.exists():
            return pd.DataFrame()

        # Am I really a programmer or just a lego assembler?
        # Pandas makes my life at least 20 times easier.
        df = pd.DataFrame.from_records(self.values, index=self.index_column)

        # make the columns and labels prettier
        if self.rename_columns:
            df = df.rename(columns=self.column_mapping)

        df.index.name = TIME_COLUMN_NAME
        try:
            df.index = df.index.tz_convert(self.user.pytz_timezone)
        except AttributeError:
            # if attribute-error means the index is just a regular Index and
            # that only dates (and not time) was passed
            df.index = pd.DatetimeIndex(df.index, tz=self.user.pytz_timezone)

        # cast it as numerics if possible, otherwise if we're dealing with strings, ignore
        df = df.apply(pd.to_numeric, errors='ignore')

        return df

models.py 文件源码项目：pyprocessmacro 作者: QuentinAndre 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def _cond_ind_effects_wrapper(self):
        """
        A wrapper for the conditional indirect effects.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the conditional indirect effects.
        """
        symb_to_var = self._symb_to_var
        results = self.estimation_results
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Effect", "Boot SE", "BootLLCI", "BootULCI"]

        mod_values = self._moderators_values
        med_values = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        values = med_values + mod_values

        rows_levels = np.array([i for i in product(*values)])
        cols_levels = ["Mediator"] + [symb_to_var.get(x, x) for x in self._moderators_symb]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"])

models.py 文件源码项目：pyprocessmacro 作者: QuentinAndre 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _simple_ind_effects_wrapper(self):
        """
        A wrapper for the indirect effects (and for total/contrast effects if specified)
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the simple/total/constrasts of indirect effects.
        """
        symb_to_var = self._symb_to_var
        results = self.estimation_results
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T

        med_names = [symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]
        rows_levels = []
        if self._options["total"]:
            rows_levels += ["TOTAL"]
        rows_levels += med_names
        if self._options["contrast"]:
            contrasts = ["Contrast: {} vs. {}".format(a, b) for a, b in combinations(med_names, 2)]
            rows_levels += contrasts
        rows_levels = np.array(rows_levels).reshape(-1, 1)

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = ["", "Effect", "Boot SE", "BootLLCI", "BootULCI"]
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"])

models.py 文件源码项目：pyprocessmacro 作者: QuentinAndre 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _PMM_index_wrapper(self):
        """
        A wrapper for the Partial Moderated Mediation index.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the PMM index.
        """
        symb_to_var = self._symb_to_var
        results = self._PMM_index()
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Index", "Boot SE", "LLCI", "ULCI"]

        mod_names = [[symb_to_var.get(i, i) for i in self._moderators_symb]]
        med_names = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        values = mod_names + med_names
        rows_levels = np.array([i for i in product(*values)])
        cols_levels = ["Moderator", "Mediator"]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"])

models.py 文件源码项目：pyprocessmacro 作者: QuentinAndre 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _MMM_index_wrapper(self):
        """
        A wrapper for the Moderated Moderated Mediation index.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the CMM index.
        """
        symb_to_var = self._symb_to_var
        results = self._MMM_index()
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Index", "Boot SE", "BootLLCI", "BootULCI"]

        med_names = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        rows_levels = np.array([i for i in product(*med_names)])
        cols_levels = ["Mediator"]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"])

DST_unsupervised.py 文件源码项目：ML_algorithm 作者: luoshao23 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def clean_data(DT_df, attributes):
    """data preprocessing"""
    # DT_df = DT_df.drop(drop_cols, axis=1)
    DT_df["fs_scan_amt_pre"] = DT_df["fs_scan_amt_pre"].astype(float)
    DT_df["fs_scan_amt_pos"] = DT_df["fs_scan_amt_pos"].astype(float)
    DT_df["fs_scan_amt_pos_PF"] = DT_df["fs_scan_amt_pos_PF"].astype(float)
    DT_df["dyn_margin_amt_pre"] = DT_df["dyn_margin_amt_pre"].astype(float)
    DT_df["dyn_margin_amt_pos"] = DT_df["dyn_margin_amt_pos"].astype(float)
    DT_df["dyn_margin_amt_pos_PF"] = DT_df[
        "dyn_margin_amt_pos_PF"].astype(float)
    DT_df["ctl_grp_ind"] = DT_df["ctl_grp_ind"].astype(int)
    DT_df["mailer_version_id"] = DT_df["mailer_version_id"].astype(int)
    DT_df["tcm_redeem_md"] = pd.to_numeric(DT_df["tcm_redeem_md"])
    for attr in attributes:
        DT_df[attr] = DT_df[attr].astype(int)

    fields = attributes + ["fs_scan_amt_pre", "fs_scan_amt_pos", "fs_scan_amt_pos_PF", "dyn_margin_amt_pre", "dyn_margin_amt_pos", "dyn_margin_amt_pos_PF",
                           "ctl_grp_ind", "mailer_version_id", "tcm_redeem_md", "xtra_card_nbr"]
    DT_df = DT_df[fields]
    return DT_df

show_job_info.py 文件源码项目：atropos 作者: jdidion 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _get_table(self, column, is_size=True):
        cols = list(range(5))
        cols.append(self.header.index(column))
        header = [self.header[c] for c in cols]
        rows = [
            [row[c] for c in cols]
            for row in self.rows
        ]
        if is_size:
            for row in rows:
                row[5] = parse_size(row[5])
        table = pd.DataFrame.from_records(rows, columns=header)
        table = table.rename(columns={ 
            'prog' : 'Program',
            'prog2' : 'Program2',
            'threads' : 'Threads',
            'dataset' : 'Dataset',
            'qcut' : 'Quality',
        })
        table['Threads'] = pd.to_numeric(table['Threads'])
        table['Dataset'] = pd.Categorical(table['Dataset'])
        table['Program'] = pd.Categorical(table['Program'])
        table['Program2'] = pd.Categorical(table['Program2'])
        return table

Mamajek_Table.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def __init__(self, filename=TABLE_FILENAME):
        MS = SpectralTypeRelations.MainSequence()

        # Read in the table.
        colspecs=[[0,7], [7,14], [14,21], [21,28], [28,34], [34,40], [40,47], [47,55],
                  [55,63], [63,70], [70,78], [78,86], [86,94], [94,103], [103,110],
                  [110,116], [116,122], [122,130], [130,137], [137,144], [144,151],
                  [151,158]]
        mam_df = pd.read_fwf(filename, header=20, colspecs=colspecs, na_values=['...'])[:92]

        # Strip the * from the logAge column. Probably shouldn't but...
        mam_df['logAge'] = mam_df['logAge'].map(lambda s: s.strip('*') if isinstance(s, basestring) else s)

        # Convert everything to floats
        for col in mam_df.columns:
            mam_df[col] = pd.to_numeric(mam_df[col], errors='ignore')

        # Add the spectral type number for interpolation
        mam_df['SpTNum'] = mam_df['SpT'].map(MS.SpT_To_Number)

        self.mam_df = mam_df

consensus.py 文件源码项目：Comparative-Annotation-Toolkit 作者: ComparativeGenomicsToolkit 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def load_metrics_from_db(db_path, tx_mode, aln_mode):
    """
    Loads the alignment metrics for the mRNA/CDS alignments of transMap/AugustusTM/TMR
    """
    session = tools.sqlInterface.start_session(db_path)
    metrics_table = tools.sqlInterface.tables[aln_mode][tx_mode]['metrics']
    metrics_df = tools.sqlInterface.load_metrics(metrics_table, session)
    # unstack flattens the long-form data structure
    metrics_df = metrics_df.set_index(['AlignmentId', 'classifier']).unstack('classifier')
    metrics_df.columns = [col[1] for col in metrics_df.columns]
    metrics_df = metrics_df.reset_index()
    cols = ['AlnCoverage', 'AlnGoodness', 'AlnIdentity', 'PercentUnknownBases']
    metrics_df[cols] = metrics_df[cols].apply(pd.to_numeric)
    metrics_df['OriginalIntrons'] = metrics_df['OriginalIntrons'].fillna('')
    metrics_df['OriginalIntrons'] = [list(map(int, x)) if len(x[0]) > 0 else [] for x in
                                     metrics_df['OriginalIntrons'].str.split(',').tolist()]
    metrics_df['OriginalIntronsPercent'] = metrics_df['OriginalIntrons'].apply(calculate_vector_support, resolve_nan=1)
    session.close()
    return metrics_df

routing_table.py 文件源码项目：MAP-IT 作者: alexmarder 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def create_routing_table(bgp=None, ixp_prefixes=None, ixp_asns=None, bgp_compression='infer'):
    log.info('Creating IP2AS tool.')
    if bgp_compression == 'infer' and bgp.startswith('http'):
        bgp_compression = infer_compression(bgp, 'infer')
    if not isinstance(ixp_prefixes, pd.DataFrame):
        ixp_prefixes = set(pd.read_csv(ixp_prefixes, comment='#', index_col=0).index.unique()) if ixp_prefixes is not None else set()
    if not isinstance(ixp_asns, pd.DataFrame):
        ixp_asns = set(pd.read_csv(ixp_asns, comment='#', index_col=0).index.unique()) if ixp_asns is not None else set()
    if not isinstance(bgp, pd.DataFrame):
        bgp_original = pd.read_table(bgp, comment='#', names=['Address', 'Prefixlen', 'ASN'], compression=bgp_compression)
        bgp = bgp_original[~bgp_original.ASN.str.contains(',|_')].copy()
        bgp['ASN'] = pd.to_numeric(bgp.ASN)
    rt = RoutingTable()
    for address, prefixlen, asn in bgp[~bgp.ASN.isin(ixp_asns)].itertuples(index=False):
        rt.add_prefix(asn.item(), address, prefixlen)
    for address, prefixlen, asn in bgp[bgp.ASN.isin(ixp_asns)].itertuples(index=False):
        rt.add_ixp(address, prefixlen)
    for prefix in ixp_prefixes:
        rt.add_ixp(prefix)
    rt.add_private()
    rt.add_multicast()
    rt.add_default()
    return rt

parse_gct.py 文件源码项目：cmapPy 作者: cmap 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def assemble_row_metadata(full_df, num_col_metadata, num_data_rows, num_row_metadata):
    # Extract values
    row_metadata_row_inds = range(num_col_metadata + 1, num_col_metadata + num_data_rows + 1)
    row_metadata_col_inds = range(1, num_row_metadata + 1)
    row_metadata = full_df.iloc[row_metadata_row_inds, row_metadata_col_inds]

    # Create index from the first column of full_df (after the filler block)
    row_metadata.index = full_df.iloc[row_metadata_row_inds, 0]

    # Create columns from the top row of full_df (before cids start)
    row_metadata.columns = full_df.iloc[0, row_metadata_col_inds]

    # Rename the index name and columns name
    row_metadata.index.name = row_index_name
    row_metadata.columns.name = row_header_name

    # Convert metadata to numeric if possible
    row_metadata = row_metadata.apply(lambda x: pd.to_numeric(x, errors="ignore"))

    return row_metadata

parse_gct.py 文件源码项目：cmapPy 作者: cmap 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def assemble_col_metadata(full_df, num_col_metadata, num_row_metadata, num_data_cols):

    # Extract values
    col_metadata_row_inds = range(1, num_col_metadata + 1)
    col_metadata_col_inds = range(num_row_metadata + 1, num_row_metadata + num_data_cols + 1)
    col_metadata = full_df.iloc[col_metadata_row_inds, col_metadata_col_inds]

    # Transpose so that samples are the rows and headers are the columns
    col_metadata = col_metadata.T

    # Create index from the top row of full_df (after the filler block)
    col_metadata.index = full_df.iloc[0, col_metadata_col_inds]

    # Create columns from the first column of full_df (before rids start)
    col_metadata.columns = full_df.iloc[col_metadata_row_inds, 0]

    # Rename the index name and columns name
    col_metadata.index.name = column_index_name
    col_metadata.columns.name = column_header_name

    # Convert metadata to numeric if possible
    col_metadata = col_metadata.apply(lambda x: pd.to_numeric(x, errors="ignore"))

    return col_metadata

session.py 文件源码项目：pyiem 作者: rheineke 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def _orderbook_tag_frame(text):
    # This function can be removed if this pandas feature request is implemented
    # https://github.com/pandas-dev/pandas/issues/14608
    table_str = _table_text(text)
    root = etree.fromstring(table_str)
    table_body = root.find('tbody')
    index = []
    data = defaultdict(list)
    # Iterator of tr objects
    qty_path = "td[@class='change-cell quantity']"
    tr_iter = table_body.iter(tag='tr')
    for tr in tr_iter:
        index.append(tr.find(path='td').text.strip())
        # Quantity Held
        pos = pd.to_numeric(tr.find(path=qty_path).attrib['value'])
        data[iem.QUANTITY_HELD].append(pos)
        # Your Bids
        data[iem.YOUR_BIDS].append(_num_open_orders(tr, 'yourBidsCell'))
        # Your Asks
        data[iem.YOUR_ASKS].append(_num_open_orders(tr, 'yourAsksCell'))

    return pd.DataFrame(data=data, index=index)

run_daily.py 文件源码项目：memex_ad_features 作者: giantoak 项目源码文件源码阅读 52 收藏 0 点赞 0 评论 0

def apply_ht_scores(dataframe):
    # Load the ht score dataframe
    ht_scores = pandas.read_csv('{0}ht_scores.csv'.format(config['result_data']), index_col=0)
    dataframe['phone'] = dataframe['phone'].map(lambda x: re.sub('[^0-9]', '', str(x)))
    # Make the column a numeric column for merging
    dataframe['phone'] = pandas.to_numeric(dataframe['phone'])
    final = dataframe.merge(ht_scores, how='left', left_on='phone', right_index=True)

    # Drop the content column and drop the index column
    final.drop('content', axis=1, inplace=True)

    if os.path.isfile('{0}ad_chars_final.csv'.format(config['result_data'])):
        lock.acquire()
        print 'lock has been set for file {0}'.format(file)
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), mode='a', header=False, encoding='utf-8')
        lock.release()
        print 'lock has been released for file {0}'.format(file)
    else:
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), header=True, encoding='utf-8')

create_location_files.py 文件源码项目：memex_ad_features 作者: giantoak 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def apply_ht_scores(dataframe):
    # Load the ht score dataframe
    ht_scores = pandas.read_csv('{0}ht_scores.csv'.format(config['result_data']), index_col=0)
    dataframe['phone'] = dataframe['phone'].map(lambda x: re.sub('[^0-9]', '', str(x)))
    # Make the column a numeric column for merging
    #dataframe['phone'] = pandas.to_numeric(dataframe['phone'])
    final = dataframe.merge(ht_scores, how='left', left_on='phone', right_index=True)

    # Drop the content column and drop the index column
    final.drop('content', axis=1, inplace=True)

    if os.path.isfile('{0}ad_chars_final.csv'.format(config['result_data'])):
        lock.acquire()
        print 'lock has been set for file {0}'.format(file)
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), mode='a', header=False, encoding='utf-8', index=False)
        lock.release()
    else:
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), header=True, encoding='utf-8', index=False)

calcellipsoid.py 文件源码项目：PIEFACE 作者: jcumby 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def makeDataFrame(phases):
    """ Return Pandas DataFrame object, with CIF files as index and ellipsoid parameters as columns (hierarchical by centre atom)"""

    import pandas as pd
    from pieface.readcoords import Crystal

    if isinstance(phases, dict):
        if isinstance( phases[phases.keys()[0]], Crystal):      # We are reading a dict of Crystals: convert to nested dict first
            alldata = makenesteddict(phases)
        elif isinstance( phases[phases.keys()[0]], dict ):      # Looking at a dict of dicts: assume correct for pandas...
            alldata = phases

        d = dict([ (i, pd.DataFrame(alldata[i]).set_index('files')) for i in alldata.keys() ])        # Make dict of DataFrames

        frame = pd.concat(d, axis=1)

        if len(frame.index) == 1:   # We're looking at a single cif file - unstack DataFrame with atoms as index
            return frame.ix[frame.index[0]].unstack().apply(pd.to_numeric, errors='ignore')        # Need to convert back to float/int when unstacking
        else:
            return frame
    else:
        raise TypeError("Unknown data format for conversion to DataFrame (expected dict)")

_cartesian_class_core.py 文件源码项目：chemcoord 作者: mcocdawc 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _return_appropiate_type(self, selected):
        if isinstance(selected, pd.Series):
            frame = pd.DataFrame(selected).T
            if self._required_cols <= set(frame.columns):
                selected = frame.apply(pd.to_numeric, errors='ignore')
            else:
                return selected

        if (isinstance(selected, pd.DataFrame)
                and self._required_cols <= set(selected.columns)):
            molecule = self.__class__(selected)
            molecule.metadata = self.metadata.copy()
            molecule._metadata = copy.deepcopy(self._metadata)
            return molecule
        else:
            return selected

pybroom.py 文件源码项目：pybroom 作者: tritemio 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _augment_lmfit_modelresult(result):
    """Tidy data values and fitted model from `lmfit.model.ModelResult`.
    """
    columns = ['x', 'data', 'best_fit', 'residual']
    d = pd.DataFrame(index=range(result.ndata), columns=columns)
    for col in columns[1:]:
        d.loc[:, col] = getattr(result, col)

    independent_vars = result.model.independent_vars
    if len(independent_vars) == 1:
        independent_var = independent_vars[0]
    else:
        msg = ('Only 1 independent variable is currently supported.\n'
               'Found independent variables: %s' % str(independent_vars))
        raise NotImplementedError(msg)

    x_array = result.userkws[independent_var]
    d.loc[:, 'x'] = x_array

    if len(result.components) > 1:
        comp_names = [c.name for c in result.components]
        for cname, comp in zip(comp_names, result.components):
            d.loc[:, cname] = comp.eval(x=d.x, **result.values)
    return d.apply(pd.to_numeric, errors='ignore')

Stock.py 文件源码项目：equipy 作者: kallinikator 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def __init__(self, symbol, *args):
        super().__init__()

        self.data = pd.read_csv(open(r"Stock_Data/{}.csv".format(symbol)))
        self.data = self.data.apply(pd.to_numeric, errors="ignore")
        self.data.index = self.data["Quarter end"]
        self.name = symbol

        if self.data["Price"].dtype in (int, float) and self.data["Cumulative dividends per share"].dtype in (int, float):
            self.data["Value"] = self.data["Price"] + self.data["Cumulative dividends per share"]
            # Calculation of the estimated return
            self.data["Estimated Return"] = self.data["Value"].pct_change()
            # Calculation of the standard deviation
            self.data["Standard Deviation"] = self.data["Value"].std()
        else:
            self.complete_pricelist = False

model.py 文件源码项目：uscensus 作者: nkrishnaswami 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def __call__(self, fields, geo_for, geo_in=None, cache=NopCache()):
        """Special method to make API object invocable.

        Arguments:
          * fields: list of variables to return.
          * geo_* fields must be given as dictionaries, eg:
            `{'county': '*'}`
          * cache: cache in which to store results. Not cached by default.
        """
        params = {
            'get': ','.join(fields),
            'key': self.key,
            'for': self._geo2str(geo_for),
        }
        if geo_in:
            params['in'] = self._geo2str(geo_in)

        j = fetchjson(self.endpoint, cache, self.session, params=params)
        ret = pd.DataFrame(data=j[1:], columns=j[0])
        for field in fields:
            if self.variables[field].get('predicateType') == 'int':
                ret[field] = pd.to_numeric(ret[field])
        return ret

LoadAndMatchDates.py 文件源码项目：Test-stock-prediction-algorithms 作者: timestocome 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def read_data(file_name):

    stock = pd.read_csv(file_name, parse_dates=True, index_col=0)     
    n_samples = len(stock)

    # ditch samples with NAN values
    stock = stock.dropna(axis=0)

    # flip order from newest to oldest to oldest to newest
    #stock = stock.iloc[::-1]

    # trim data
    stock = stock[['Open']]

    # convert object to floats
    stock['Open'] = pd.to_numeric(stock['Open'], errors='coerce')

    # all stock is needed to walk back dates for testing hold out data
    return stock


#############################################################################################
# load and combine stock indexes, matching the dates

qsub.py 文件源码项目：erna 作者: fact-project 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_qstat_as_df():
    """Get the current users output of qstat as a DataFrame.
    """
    user = os.environ.get("USER")
    try:
        ret = subprocess.Popen(
            ["qstat", "-u", str(user)],
            stdout=subprocess.PIPE,
        )
        df = pd.read_csv(ret.stdout, delimiter="\s+")
        # drop the first line since it is just one long line
        df = df.drop(df.index[0]).copy()
        # convert objects to numeric otherwise numbers are strings
        df["JOBID"] = pd.to_numeric(df["job-ID"], errors='coerce')
        # df.set_index("JOBID")
        df = df.drop('job-ID', 1)

    except ValueError:
        logger.exception("No jobs in queues for user {}".format(user))
        df = pd.DataFrame()
    return df

stockData.py 文件源码项目：Informed-Finance-Canary 作者: Darthone 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def get_data_from_google(ticker_sym, start, end):
    """ Returns a data frame of data for a given stock between two dates """
    url = "https://www.google.com/finance/historical?q=%s&startdate=%s&enddate=%s&output=csv" % (ticker_sym, start, end)
    s = requests.get(url).content
    df = pd.read_csv(io.StringIO(s.decode('utf-8')))
    df['Date'] = pd.to_datetime(df['Date'])
    df['epoch'] = (df['Date'] - datetime(1970,1,1)).dt.total_seconds() * 1000
    df.set_index('Date')
    df['Adj_Close'] = df['Close'] # google's api doens't provide so just assume it's the same
    cols = ['High', 'Low', 'Volume', 'Open', 'Close', 'Adj_Close']
    for c in cols: # cast columns to numeric
        df[c] = pd.to_numeric(df[c])
    return df.iloc[::-1] # reverse the dataframe so index 0 is the earliest date

#@memoize
#def get_data_for_sym(ticker_sym, start, end):
#    return list(reversed(get_data_for_sym_from_yahoo(ticker_sym, start, end)))
#   #res = StockFeature.select().where(Relationship.from_user == self))

vcf2dataframe.py 文件源码项目：pdVCF 作者: superDross 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def calc_AB(vcf):
    ''' Calculate allele balance for all samples in a given 
        pdVCF. Also converts DP & GQ to numeric type.

    Args:
        vcf: pdVCF with genotype information extracted

    Notes:
        ONLY WORKS FOR BIALLELIC VARIANTS
    '''
    sam = vcf.columns.levels[0][0]
    vcf[sam,'DP'] = pd.to_numeric(vcf[sam,'DP'].str.replace('.', '0')) # bcftools places '.' in empty fields
    vcf[sam,'GQ'] = pd.to_numeric(vcf[sam,'GQ'].str.replace('.', '0'))
    AD = vcf.xs('AD', level=1, axis=1).unstack().str.split(",", n=2)
    DP = vcf.xs('DP', level=1, axis=1).unstack()
    AB = round(pd.to_numeric(AD.str[1]) / pd.to_numeric(DP), 2)
    vcf[sam, 'AB'] = AB.tolist()
    return vcf

project.py 文件源码项目：pastas 作者: pastas 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def update_distances(self):
        """
        Calculate the distances between the observed series and the stresses.

        Returns
        -------
        distances: pandas.DataFrame
            pandas dataframe with the distances between the oseries (index)
            and the stresses (columns).

        """
        # Make sure these are values, even when actually objects.
        xo = pd.to_numeric(self.oseries.x)
        xt = pd.to_numeric(self.stresses.x)
        yo = pd.to_numeric(self.oseries.y)
        yt = pd.to_numeric(self.stresses.y)

        xh, xi = np.meshgrid(xt, xo)
        yh, yi = np.meshgrid(yt, yo)

        self.distances = pd.DataFrame(np.sqrt((xh - xi) ** 2 + (yh - yi) ** 2),
                                      index=self.oseries.index,
                                      columns=self.stresses.index)

parsers.py 文件源码项目：soundDB 作者: gjoseph92 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def parse(self, entry):      
        data = pd.read_csv(str(entry),
                           engine= "c",
                           sep= "\t",
                           parse_dates= False,
                           index_col= [0, 1])

        data.index.names = ["date", "srcid"]

        # Check for AMT bug that adds row of ('nvsplDate', 'Total_All') with all 0s, drop if exists
        if data.index[-1][0] == 'nvsplDate':
            data = data.iloc[:-1, :]

        ## Pandas cannot seem to handle a MultiIndex with dates;
        ## slicing syntax becomes even crazier, and often doesn't even work.
        ## So date conversion is disabled for now.

        # # Convert dates
        # datetimes = data.index.get_level_values('date').to_datetime()
        # data.index.set_levels(datetimes, level= 'date', inplace= True)

        # Ensure MultiIndex sortedness
        data.sortlevel(inplace= True)

        return data.apply(pd.to_numeric, raw= True, errors= "coerce")

graphics.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def to_numeric(self,columns):
        '''
        Args
            columns (string or list):
                column names needed to be converted
        Returns
            -
        '''
        if isinstance(columns,str):
            self.data_df[columns] = pd.to_numeric(self.data_df[columns],errors='coerce')
        elif isinstance(columns,list):
            for column in columns:
                self.data_df[column] = pd.to_numeric(self.data_df[column],errors='coerce')

    # rename certain columns

graphics.py 文件源码项目：NeoAnalysis 作者: neoanalysis 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def to_numeric(self,columns):
        '''
        Args
            columns (string or list):
                column names needed to be converted
        Returns
            -
        '''
        if isinstance(columns,str):
            self.data_df[columns] = pd.to_numeric(self.data_df[columns],errors='coerce')
        elif isinstance(columns,list):
            for column in columns:
                self.data_df[column] = pd.to_numeric(self.data_df[column],errors='coerce')

    # rename certain columns