python类isnull()的实例源码

test_arbitrary_end_serializer.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_to_records(serializer):

    data = {"value": [1, np.nan], "estimated": [True, False]}
    columns = ["value", "estimated"]
    index = pd.date_range('2000-01-01', periods=2, freq='D')
    df = pd.DataFrame(data, index=index, columns=columns)

    records = serializer.to_records(df)
    assert len(records) == 2
    assert records[0]["end"] == datetime(2000, 1, 1, tzinfo=pytz.UTC)
    assert pd.isnull(records[0]["value"])
    assert not records[0]["estimated"]

    assert records[1]["end"] == datetime(2000, 1, 2, tzinfo=pytz.UTC)
    assert records[1]["value"] == 1
    assert records[1]["estimated"]
test_data_portal.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_get_last_traded_equity_minute(self):
        trading_calendar = self.trading_calendars[Equity]
        # Case: Missing data at front of data set, and request dt is before
        # first value.
        dts = trading_calendar.minutes_for_session(self.trading_days[0])
        asset = self.asset_finder.retrieve_asset(1)
        self.assertTrue(pd.isnull(
            self.data_portal.get_last_traded_dt(
                asset, dts[0], 'minute')))

        # Case: Data on requested dt.
        dts = trading_calendar.minutes_for_session(self.trading_days[2])

        self.assertEqual(dts[1],
                         self.data_portal.get_last_traded_dt(
                             asset, dts[1], 'minute'))

        # Case: No data on dt, but data occuring before dt.
        self.assertEqual(dts[4],
                         self.data_portal.get_last_traded_dt(
                             asset, dts[5], 'minute'))
test_data_portal.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def test_get_last_traded_future_minute(self):
        asset = self.asset_finder.retrieve_asset(10000)
        trading_calendar = self.trading_calendars[Future]
        # Case: Missing data at front of data set, and request dt is before
        # first value.
        dts = trading_calendar.minutes_for_session(self.trading_days[0])
        self.assertTrue(pd.isnull(
            self.data_portal.get_last_traded_dt(
                asset, dts[0], 'minute')))

        # Case: Data on requested dt.
        dts = trading_calendar.minutes_for_session(self.trading_days[3])

        self.assertEqual(dts[1],
                         self.data_portal.get_last_traded_dt(
                             asset, dts[1], 'minute'))

        # Case: No data on dt, but data occuring before dt.
        self.assertEqual(dts[4],
                         self.data_portal.get_last_traded_dt(
                             asset, dts[5], 'minute'))
rdkit_db.py 文件源码 项目:smiles-neural-network 作者: PMitura 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def sendData(con, df):

    cursor = con.cursor()
    cols = df.columns.tolist()

    values = df.values

    for vals in values:
        for i,val in enumerate(vals):
            if pd.isnull(val):
                vals[i]=None

        query = 'INSERT INTO {} ({}) VALUES ({})'.format(
            SEND_TABLE,
            ','.join(['"{}"'.format(x) for x in cols]),
            ','.join(['%s']*len(cols)))

        cursor.execute(query, tuple(vals))

    con.commit()
    cursor.close()
beamline.py 文件源码 项目:georges 作者: chernals 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def __convert_survey_to_sequence(self):
        s = self.__beamline
        if 'LENGTH' not in s:
            s['LENGTH'] = np.nan
        offset = s['ORBIT_LENGTH'][0] / 2.0
        if pd.isnull(offset):
            offset = 0
        self.__beamline['AT_CENTER'] = pd.DataFrame(
            npl.norm(
                [
                    s['X'].diff().fillna(0.0),
                    s['Y'].diff().fillna(0.0)
                ],
                axis=0
            ) - (
                s['LENGTH'].fillna(0.0) / 2.0 - s['ORBIT_LENGTH'].fillna(0.0) / 2.0
            ) + (
                s['LENGTH'].shift(1).fillna(0.0) / 2.0 - s['ORBIT_LENGTH'].shift(1).fillna(0.0) / 2.0
            )).cumsum() / 1000.0 + offset
        self.__converted_from_survey = True
bdsim.py 文件源码 项目:georges 作者: chernals 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def split_rbends(line, n=20):
    split_line = pd.DataFrame()
    for index, row in line.iterrows():
        if row['CLASS'] == 'RBEND' and pd.isnull(row.get('SPLIT')):
            angle = row['ANGLE'] / n
            length = row['L'] / n
            for i in range(0,n):
                row = row.copy()
                row.name = index + "_{}".format(i)
                row['SPLIT'] = True
                row['ANGLE'] = angle
                row['L'] = length
                split_line = split_line.append(row)
        else:
            split_line = split_line.append(row)
    split_line[['THICK']] = split_line[['THICK']].applymap(bool)
    return split_line
madx.py 文件源码 项目:georges 作者: chernals 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def element_to_mad(e):
    """Convert a pandas.Series representation onto a MAD-X sequence element."""
    if e.CLASS not in SUPPORTED_CLASSES:
        return ""
    mad = "{}: {}, ".format(e.name, e.CLASS)
    if e.get('BENDING_ANGLE') is not None and not np.isnan(e['BENDING_ANGLE']):
        mad += f"ANGLE={e['BENDING_ANGLE']},"
    elif e.get('ANGLE') is not None and not np.isnan(e['ANGLE']):
        mad += f"ANGLE={e.get('ANGLE', 0)},"
    else:
        # Angle property not supported by the element or absent
        mad += ""
    mad += ', '.join(["{}={}".format(p, e[p]) for p in SUPPORTED_PROPERTIES if pd.notnull(e.get(p, None))])
    if pd.notnull(e['LENGTH']) and e['LENGTH'] != 0.0:
        mad += ", L={}".format(e['LENGTH'])
    if pd.notnull(e.get('APERTYPE', None)):
        mad += ", APERTURE={}".format(str(e['APERTURE']).strip('[]'))
    if pd.notnull(e.get('PLUG')) and pd.notnull(e.get('CIRCUIT')) and pd.isnull(e.get('VALUE')):
        mad += ", {}:={}".format(e['PLUG'], e['CIRCUIT'])
    if pd.notnull(e.get('PLUG')) and pd.notnull(e.get('VALUE')):
        mad += ", {}={}".format(e['PLUG'], e['VALUE'])
    mad += ", AT={}".format(e['AT_CENTER'])
    mad += ";"
    return mad
metadata.py 文件源码 项目:qiime2 作者: qiime2 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _validate_pandas_index(index, label):
    # `/` and `\0` aren't permitted because they are invalid filename
    # characters on *nix filesystems. The remaining values aren't permitted
    # because they *could* be misinterpreted by a shell (e.g. `*`, `|`).
    illegal_chars = ['/', '\0', '\\', '*', '<', '>', '?', '|', '$']
    chars_for_msg = ", ".join("%r" % i for i in illegal_chars)
    illegal_chars = set(illegal_chars)

    # First check the index dtype and ensure there are no null values
    if index.dtype_str not in ['object', 'str'] or pd.isnull(index).any():
        msg = "Non-string Metadata %s values detected" % label
        raise ValueError(invalid_metadata_template % msg)

    # Then check for invalid characters along index
    for value in index:
        if not value or illegal_chars & set(value):
            msg = "Invalid characters (e.g. %s) or empty ID detected in " \
                  "metadata %s: %r" % (chars_for_msg, label, value)
            raise ValueError(invalid_metadata_template % msg)

    # Finally, ensure unique values along index
    if len(index) != len(set(index)):
        msg = "Duplicate Metadata %s values detected" % label
        raise ValueError(invalid_metadata_template % msg)
reader.py 文件源码 项目:nuts-ml 作者: maet3608 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def isnull(value):
        """
        Return true if values is NaN or None.

        >>> import numpy as np
        >>> ReadPandas.isnull(np.NaN)
        True

        >>> ReadPandas.isnull(None)
        True

        >>> ReadPandas.isnull(0)
        False

        :param value: Value to test
        :return: Return true for NaN or None values.
        :rtype: bool
        """
        return pd.isnull(value)
f1_data.py 文件源码 项目:f1_2017 作者: aflaisler 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def clean_data(self):
        # load qualif and race data
        df_qual = self.load_qualif_data()
        df_races = self.load_results_data()
        # remove Japan as no data for 2015 race
        df_qual = self.del_japan15(df_qual)
        df_races = self.del_japan15(df_races)
        # create unique id
        df_qual = self.unique_id(df_qual)
        df_races = self.unique_id(df_races)
        # merge the results
        df_out = df_races.merge(
            df_qual, on='id_', how='inner', suffixes=('', '_qual'))
        df_out = df_out[pd.isnull(df_out.q_min) == False]
        print df_out.shape
        return df_out.reset_index(drop=1), df_races.reset_index(drop=1), df_qual.reset_index(drop=1)

    # load the data
f1_regression.py 文件源码 项目:f1_2017 作者: aflaisler 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def Xy_matrix(df_qual_and_race, columns, df_wet):
    df_q_r_out = df_qual_and_race.loc[:, columns].reset_index(drop=1)
    df_q_r_out = df_q_r_out[(pd.isnull(
        df_q_r_out[y_label]) == False) & (pd.isnull(df_q_r_out.q_min) == False)].reset_index(drop=1)
    X = df_q_r_out.loc[:, ['q_min', 'position_qual', 'raceId', 'circuitId',
                           'driverId', 'year', 'round', 'dob', y_label]]
    # birth year / mo
    X['birth_year'] = map(lambda x: int(x.year), df_q_r_out['dob'])
    X['birth_mo'] = map(lambda x: int(x.month), df_q_r_out['dob'])
    X.drop('dob', axis=1, inplace=1)
    # adding wet as a feature
    # weather data
    df_races = d['races'].copy()
    # df_races.head()
    X = X.merge(df_wet.drop(['circuitId'], 1),
                how='left', on=['year', 'round'])
    # pit stop
    df_pits = d['pitStops'].groupby(['raceId', 'driverId'], as_index=0)[
        'milliseconds'].sum()
    df_pits.reset_index(drop=1, inplace=1)
    X_y = X.merge(df_pits, how='left', on=['raceId', 'driverId'])
    X_y.fillna(0, inplace=1)
    return X_y
checkpandas.py 文件源码 项目:tdda 作者: tdda 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def differences(self, name, values, ref_values, precision):
        """
        Returns a short summary of where values differ, for two columns.
        """
        for i, val in enumerate(values):
            refval = ref_values[i]
            if val != refval and not (pd.isnull(val) and pd.isnull(refval)):
                stop = self.ndifferences(values, ref_values, i)
                summary_vals = self.sample_format(values, i, stop, precision)
                summary_ref_vals = self.sample_format(ref_values, i, stop,
                                                      precision)
                return 'From row %d: [%s] != [%s]' % (i+1,
                                                      summary_vals,
                                                      summary_ref_vals)
        if values.dtype != ref_values.dtype:
            return 'Different types'
        else:
            return 'But mysteriously appear to be identical!'
constraints.py 文件源码 项目:tdda 作者: tdda 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def pandas_tdda_type(x):
     dt = getattr(x, 'dtype', None)
     if type(x) == str or dt == np.dtype('O'):
         return 'string'
     dts = str(dt)
     if type(x) == bool or 'bool' in dts:
         return 'bool'
     if type(x) in (int, long) or 'int' in dts:
         return 'int'
     if type(x) == float or 'float' in dts:
         return 'real'
     if (type(x) == datetime.datetime or 'datetime' in dts
                 or type(x) == pandas_Timestamp):
         return 'date'
     if x is None or (not isinstance(x, pd.core.series.Series)
                      and pd.isnull(x)):
         return 'null'
     # Everything else is other, for now, including compound types,
     # unicode in Python2, bytes in Python3 etc.
     return 'other'
tree.py 文件源码 项目:tgboost 作者: wepe 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _predict(self, treenode, X):
        """
        predict a single sample
        note that X is a tupe(index,pandas.core.series.Series) from df.iterrows()
        """
        if treenode.is_leaf:
            return treenode.leaf_score
        elif pd.isnull(X[1][treenode.feature]):
            if treenode.nan_direction == 0:
                return self._predict(treenode.left_child, X)
            else:
                return self._predict(treenode.right_child, X)
        elif X[1][treenode.feature] < treenode.threshold:
            return self._predict(treenode.left_child, X)
        else:
            return self._predict(treenode.right_child, X)
history_container.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def ffill_buffer_from_prior_values(freq,
                                   field,
                                   buffer_frame,
                                   digest_frame,
                                   pv_frame,
                                   raw=False):
    """
    Forward-fill a buffer frame, falling back to the end-of-period values of a
    digest frame if the buffer frame has leading NaNs.
    """
    # convert to ndarray if necessary
    digest_values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        digest_values = digest_frame.values

    buffer_values = buffer_frame
    if raw and isinstance(buffer_frame, pd.DataFrame):
        buffer_values = buffer_frame.values

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids) and len(digest_values):
        # If we have any leading nans in the buffer and we have a non-empty
        # digest frame, use the oldest digest values as the initial buffer
        # values.
        buffer_values[0, nan_sids] = digest_values[-1, nan_sids]

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids):
        # If we still have leading nans, fall back to the last known values
        # from before the digest.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        buffer_values[0, nan_sids] = filler

    if raw:
        filled = ffill(buffer_values)
        return filled

    return buffer_frame.ffill()
history_container.py 文件源码 项目:zipline-chinese 作者: zhanghan1990 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def ffill_digest_frame_from_prior_values(freq,
                                         field,
                                         digest_frame,
                                         pv_frame,
                                         raw=False):
    """
    Forward-fill a digest frame, falling back to the last known prior values if
    necessary.
    """
    # convert to ndarray if necessary
    values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        values = digest_frame.values

    nan_sids = pd.isnull(values[0])
    if np.any(nan_sids):
        # If we have any leading nans in the frame, use values from pv_frame to
        # seed values for those sids.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        values[0, nan_sids] = filler

    if raw:
        filled = ffill(values)
        return filled

    return digest_frame.ffill()
gw_util.py 文件源码 项目:scikit-dataaccess 作者: MITHaystack 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def combine_water_heights(in_data):
    '''
    Combine median and average water heights

    Create a column of water heights in input data frame using Median
    Water Depth by default, but fills in missing data using average
    values

    @param in_data: Input water heights data
    '''

    if 'Mean Water Depth' in in_data.columns and 'Median Water Depth' in in_data.columns:
        # replacing all null median data with mean data
        median_null_index = pd.isnull(in_data.loc[:,'Median Water Depth'])


        in_data.loc[:,'Combined Water Depth'] = in_data.loc[:,'Median Water Depth']

        # Check if there is any replacement data available
        if (~pd.isnull(in_data.loc[median_null_index, 'Mean Water Depth'])).sum() > 0:
            in_data.loc[median_null_index, 'Combined Water Depth'] = in_data.loc[median_null_index, 'Mean Water Depth']

    elif 'Mean Water Depth' in in_data.columns and 'Median Water Depth' not in in_data.columns:
        in_data.loc[:,'Combined Water Depth'] = in_data.loc[:,'Mean Water Depth']

    elif 'Mean Water Depth' not in in_data.columns and 'Median Water Depth' in in_data.columns:
        in_data.loc[:,'Combined Water Depth'] = in_data.loc[:,'Median Water Depth']

    else:
        raise ValueError("in_data needs either 'Mean Water Depth' or 'Median Water Depth' or both")
tdx_formula.py 文件源码 项目:tdx_formula 作者: woodylee1974 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def CONV(self, param):
        df = pd.DataFrame(index = param[0].index)
        df['X'] = param[0]
        df['W'] = param[1]
        class Convolution:
            def __init__(self, N):
                self.N = N
                self.q = deque([], self.N)
                self.tq = deque([], self.N)
                self.s = 0
                self.t = 0

            def handleInput(self, row):
                if len(self.q) < self.N:
                    if pd.isnull(row['W']) or pd.isnull(row['X']):
                        return np.NaN
                    self.q.append(row['W'] * row['X'])
                    self.tq.append(row['W'])
                    self.s += row['W'] * row['X']
                    self.t += row['W']
                    return np.NaN
                ret = self.s / self.t
                self.s -= self.q[0]
                self.t -= self.tq[0]
                delta_s = row['W'] * row['X']
                delta_t = row['W']
                self.s += delta_s
                self.t += delta_t
                self.q.append(delta_s)
                self.tq.append(delta_t)
                return ret
        conv = Convolution(param[2])
        result = df.apply(conv.handleInput, axis = 1, reduce = True)
        return result


#??????
atlas.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def build_strain_specific_models(self, save_models=False):
        """Using the orthologous genes matrix, create and modify the strain specific models based on if orthologous
            genes exist.

        Also store the sequences directly in the reference GEM-PRO protein sequence attribute for the strains.
        """

        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix')

        # Create an emptied copy of the reference GEM-PRO
        for strain_gempro in tqdm(self.strains):
            log.debug('{}: building strain specific model'.format(strain_gempro.id))

            # For each genome, load the metabolic model or genes from the reference GEM-PRO
            logging.disable(logging.WARNING)
            if self._empty_reference_gempro.model:
                strain_gempro.load_cobra_model(self._empty_reference_gempro.model)
            elif self._empty_reference_gempro.genes:
                strain_gempro.genes = [x.id for x in self._empty_reference_gempro.genes]
            logging.disable(logging.NOTSET)

            # Get a list of genes which do not have orthology in the strain
            not_in_strain = self.df_orthology_matrix[pd.isnull(self.df_orthology_matrix[strain_gempro.id])][strain_gempro.id].index.tolist()

            # Mark genes non-functional
            self._pare_down_model(strain_gempro=strain_gempro, genes_to_remove=not_in_strain)

            # Load sequences into the base and strain models
            self._load_strain_sequences(strain_gempro=strain_gempro)

            if save_models:
                cobra.io.save_json_model(model=strain_gempro.model,
                                         filename=op.join(self.model_dir, '{}.json'.format(strain_gempro.id)))
                strain_gempro.save_pickle(op.join(self.model_dir, '{}_gp.pckl'.format(strain_gempro.id)))


        log.info('Created {} new strain-specific models and loaded in sequences'.format(len(self.strains)))
FeatureEncoding.py 文件源码 项目:kaggle 作者: RankingAI 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __ApplyOHE(cls, data, d_feat):
        """"""
        n = len(data)
        result = np.zeros((n, len(d_feat)), dtype='int8')
        ##
        d_stat = {}
        for i in range(n):
            for col in cls.CategoryCols:
                v = data.ix[i, col]
                if(col not in d_stat):
                    d_stat[col] = {}
                if(pd.isnull(v)):
                    result[i, d_feat['%s:missing' % col]] = 1
                    if('missing' in d_stat[col]):
                        d_stat[col]['missing'] += 1
                    else:
                        d_stat[col]['missing'] = 1
                elif('%s:%s' % (col, v) in d_feat):
                    result[i, d_feat['%s:%s' % (col, v)]] = 1
                    if('hit' in d_stat[col]):
                        d_stat[col]['hit'] += 1
                    else:
                        d_stat[col]['hit'] = 1
                else:
                    result[i, d_feat['%s:less' % col]] = 1
                    if('less' in d_stat[col]):
                        d_stat[col]['less'] += 1
                    else:
                        d_stat[col]['less'] = 1

        ## check
        for col in d_stat:
            if(np.sum(list(d_stat[col].values())) != n):
                print('Encoding for column %s error, %d : %d. ' % (col, np.sum(list(d_stat[col].values())),n))

        return result


问题


面经


文章

微信
公众号

扫码关注公众号