python类isnull()的实例源码

soccerstan.py 文件源码 项目:soccerstan 作者: Torvaney 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def read_data(fname):
    """ Read football-data.co.uk csv """
    data = (
        pd.read_csv(fname)
        .rename(columns={
                'HomeTeam': 'home_team',
                'AwayTeam': 'away_team',
                'FTHG': 'home_goals',
                'FTAG': 'away_goals'
            })
        .loc[lambda df: ~pd.isnull(df['home_goals'])]  # Remove future games
    )

    team_map = stan_map(pd.concat([data['home_team'], data['away_team']]))
    data['home_team_id'] = data['home_team'].replace(team_map)
    data['away_team_id'] = data['away_team'].replace(team_map)


    for col in ('home_goals', 'away_goals'):
        data[col] = [int(c) for c in data[col]]

    return data, team_map
pdb.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def get_resolution(pdb_id):
    """Quick way to get the resolution of a PDB ID using the table of results from the REST service

    Returns infinity if the resolution is not available.

    Returns:
        float: resolution of a PDB ID in Angstroms

    TODO:
        - Unit test

    """

    pdb_id = pdb_id.upper()
    if pdb_id not in _property_table().index:
        raise ValueError('PDB ID not in property table')
    else:
        resolution = _property_table().ix[pdb_id, 'resolution']
        if pd.isnull(resolution):
            log.debug('{}: no resolution available, probably not an X-ray crystal structure')
            resolution = float('inf')

    return resolution
pdb.py 文件源码 项目:ssbio 作者: SBRG 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def get_release_date(pdb_id):
    """Quick way to get the release date of a PDB ID using the table of results from the REST service

    Returns None if the release date is not available.

    Returns:
        str: Organism of a PDB ID

    """

    pdb_id = pdb_id.upper()
    if pdb_id not in _property_table().index:
        raise ValueError('PDB ID not in property table')
    else:
        release_date = _property_table().ix[pdb_id, 'releaseDate']
        if pd.isnull(release_date):
            log.debug('{}: no taxonomy available')
            release_date = None

    return release_date
create_pharma_products.py 文件源码 项目:scheduled-bots 作者: SuLab 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def do_pharm_prod(drug_qid, brand_rxnorm, emea, url, brand_name):
    # write info on the pharmaceutical product page
    ref = create_ref_statement(emea, url)
    # has active substance
    s = [wdi_core.WDItemID(drug_qid, 'P3781', references=[ref])]
    # instance of
    s.append(wdi_core.WDItemID('Q28885102', 'P31', references=[ref]))  # pharmaceutical product
    s.append(wdi_core.WDItemID('Q169336', 'P31', references=[ref]))  # chemical mixture
    # emea
    s.append(wdi_core.WDExternalID(emea, 'P3637', references=[ref]))

    if not pd.isnull(brand_rxnorm):
        s.append(wdi_core.WDExternalID(str(int(brand_rxnorm)), "P3345"))
    item = wdi_core.WDItemEngine(item_name=brand_name, data=s, domain="drugs", append_value=['P3781'])
    item.set_label(brand_name)
    if item.get_description() == '':
        item.set_description("pharmaceutical product")
    wdi_helpers.try_write(item, emea, 'P3637', login, edit_summary="add 'active ingredient'")

    return item.wd_item_id
mesh_changes.py 文件源码 项目:scheduled-bots 作者: SuLab 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def get_wikidata_do_mesh():
    # get mesh xrefs, and including mapping relation type
    # {'DOID:0050856': {'skos:broadMatch_D019958'}}
    query = """
    select ?item ?doid ?mesh ?mesh_rt where {
      ?item wdt:P699 ?doid .
      ?item p:P486 ?mesh_s .
      ?mesh_s ps:P486 ?mesh .
      optional { ?mesh_s pq:P4390 ?mesh_rt }
    }"""
    results = WDItemEngine.execute_sparql_query(query)['results']['bindings']
    results = [{k: v['value'].replace("http://www.wikidata.org/entity/", "") for k, v in item.items()} for item in
               results]

    df = pd.DataFrame(results)
    df['mesh_rt'] = df.apply(lambda row: QID_MAP_REL_TYPE_CURIE[row.mesh_rt] + "_MESH:" + row.mesh, axis=1)

    df['_item'] = df['item']
    r = df.groupby("_item").aggregate(lambda x: set(y for y in x if not pd.isnull(y))).to_dict("records")
    wd = {list(x['doid'])[0]: x for x in r}
    wd = {k: v['mesh_rt'] for k, v in wd.items()}
    wd = {k: v for k, v in wd.items() if v}
    return wd
rinex_new.py 文件源码 项目:pyrsss 作者: butala 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def correct_p1c1(rinex_dump, replace_p1_with_c1=True):
    """
    """
    if rinex_dump.recv_p1c1 not in [1, 2, 3]:
        raise ValueError('unknown receiver type {} (must be 1, 2, or 3)'.format(rinex_dump.recv_p1c1))
    for sat in sorted(set(rinex_dump.sat)):
        b = rinex_dump.p1c1_table[sat]
        if rinex_dump.recv_p1c1 == 1:
            rinex_dump.loc[rinex_dump.sat == sat, 'C1'] += b
            rinex_dump.loc[rinex_dump.sat == sat, 'P2'] += b
        elif rinex_dump.recv_p1c1 == 2:
            rinex_dump.loc[rinex_dump.sat == sat, 'C1'] += b
    if replace_p1_with_c1:
        I = PD.isnull(rinex_dump['P1'])
        rinex_dump.loc[I, 'P1'] = rinex_dump.loc[I, 'C1']
    return rinex_dump
histogram_filling.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def to_ns(x):
    """Convert input timestamps to nanoseconds (integers)

    :param x: value to be converted
    :returns: converted value
    :rtype: int
    """

    if pd.isnull(x):
        return 0
    try:
        return pd.to_datetime(x).value
    except:
        if hasattr(x, '__str__'):
            return pd.to_datetime(str(x)).value
    return 0
dq_helper.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def check_nan(val):
    """Check input value for not a number

    :param val: value to be checked for nan
    :returns: true if nan
    :rtype: bool
    """

    if pd.isnull(val):
        return True
    if isinstance(val, str):
        val = val.strip()
        if not val or val.lower() == 'none' or val.lower() == 'nan':
            return True
    #from numpy import datetime64
    # if isinstance(val, datetime64):
    #    return val == datetime64('NaT')
    return False
dq_helper.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def to_str(val, **kwargs):
    """Convert input to string

    :param val: value to be converted
    :returns: converted value
    :rtype: str
    """

    try:
        if pd.isnull(val):
            return kwargs['nan']
    except BaseException:
        pass
    if isinstance(val, str):
        return val
    if kwargs.get('convert_inconsistent_dtypes', True):
        if hasattr(val, '__str__'):
            return str(val)
    return kwargs['nan']
dq_helper.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def to_int(val, **kwargs):
    """Convert input to int

    :param val: value to be evaluated
    :returns: evaluated value
    :rtype: np.int64
    """

    try:
        if pd.isnull(val):
            return kwargs['nan']
    except BaseException:
        pass
    if isinstance(val, np.int64) or isinstance(val, int):
        return np.int64(val)
    if kwargs.get('convert_inconsistent_dtypes', True):
        try:
            return np.int64(val)
        except BaseException:
            pass
    return kwargs['nan']
dq_helper.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def bool_to_str(val, **kwargs):
    """Convert input boolean to str

    :param val: value to be evaluated
    :returns: evaluated value
    :rtype: str
    """

    try:
        if pd.isnull(val):
            return kwargs['nan']
    except BaseException:
        pass
    if isinstance(val, np.bool_) or isinstance(val, bool):
        return str(val)
    if kwargs.get('convert_inconsistent_dtypes', True):
        if hasattr(val, '__str__'):
            return str(val)
    return kwargs['nan']
dq_helper.py 文件源码 项目:Eskapade 作者: KaveIO 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def bool_to_int(val):
    """Convert input boolean to int

    :param val: value to be evaluated
    :returns: evaluated value
    :rtype: np.int64
    """

    try:
        if pd.isnull(val):
            return kwargs['nan']
    except BaseException:
        pass
    if isinstance(val, np.bool_) or isinstance(val, bool):
        return np.int64(val)
    if kwargs.get('convert_inconsistent_dtypes', False):
        try:
            return np.int64(val)
        except BaseException:
            pass
    return kwargs['nan']
test_imputer_iterative_regress.py 文件源码 项目:dsbox-cleaning 作者: usc-isi-i2 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def helper_impute_result_check(self, data, result):
        """
        check if the imputed reuslt valid
        now, check for:
        1. contains no nan anymore
        2. orignal non-nan value should remain the same
        """
        # check 1
        self.assertEqual(pd.isnull(result).sum().sum(), 0)

        # check 2
        # the original non-missing values must keep unchanged
        # to check, cannot use pd equals, since the imputer may convert:
        # 1 -> 1.0
        # have to do loop checking
        missing_value_mask = pd.isnull(data)
        for col_name in data:
            data_non_missing = data[~missing_value_mask[col_name]][col_name]
            result_non_missing = result[~missing_value_mask[col_name]][col_name]
            for i in data_non_missing.index:
                self.assertEqual(data_non_missing[i]==result_non_missing[i], True, 
                    msg="not equals in column: {}".format(col_name))
test_imputer_knn.py 文件源码 项目:dsbox-cleaning 作者: usc-isi-i2 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def helper_impute_result_check(self, data, result):
        """
        check if the imputed reuslt valid
        now, check for:
        1. contains no nan anymore
        2. orignal non-nan value should remain the same
        """
        # check 1
        self.assertEqual(pd.isnull(result).sum().sum(), 0)

        # check 2
        # the original non-missing values must keep unchanged
        # to check, cannot use pd equals, since the imputer may convert:
        # 1 -> 1.0
        # have to do loop checking
        missing_value_mask = pd.isnull(data)
        for col_name in data:
            data_non_missing = data[~missing_value_mask[col_name]][col_name]
            result_non_missing = result[~missing_value_mask[col_name]][col_name]
            for i in data_non_missing.index:
                self.assertEqual(data_non_missing[i]==result_non_missing[i], True, 
                    msg="not equals in column: {}".format(col_name))
test_imputer_mean.py 文件源码 项目:dsbox-cleaning 作者: usc-isi-i2 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def helper_impute_result_check(self, data, result):
        """
        check if the imputed reuslt valid
        now, check for:
        1. contains no nan anymore
        2. orignal non-nan value should remain the same
        """
        # check 1
        self.assertEqual(pd.isnull(result).sum().sum(), 0)

        # check 2
        # the original non-missing values must keep unchanged
        # to check, cannot use pd equals, since the imputer may convert:
        # 1 -> 1.0
        # have to do loop checking
        missing_value_mask = pd.isnull(data)
        for col_name in data:
            data_non_missing = data[~missing_value_mask[col_name]][col_name]
            result_non_missing = result[~missing_value_mask[col_name]][col_name]
            for i in data_non_missing.index:
                self.assertEqual(data_non_missing[i]==result_non_missing[i], True, 
                    msg="not equals in column: {}".format(col_name))
test_imputer_mice.py 文件源码 项目:dsbox-cleaning 作者: usc-isi-i2 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def helper_impute_result_check(self, data, result):
        """
        check if the imputed reuslt valid
        now, check for:
        1. contains no nan anymore
        2. orignal non-nan value should remain the same
        """
        # check 1
        self.assertEqual(pd.isnull(result).sum().sum(), 0)

        # check 2
        # the original non-missing values must keep unchanged
        # to check, cannot use pd equals, since the imputer may convert:
        # 1 -> 1.0
        # have to do loop checking
        missing_value_mask = pd.isnull(data)
        for col_name in data:
            data_non_missing = data[~missing_value_mask[col_name]][col_name]
            result_non_missing = result[~missing_value_mask[col_name]][col_name]
            for i in data_non_missing.index:
                self.assertEqual(data_non_missing[i]==result_non_missing[i], True, 
                    msg="not equals in column: {}".format(col_name))
scale.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def limits(self):
        if self.is_empty():
            return (0, 1)

        # Fall back to the range if the limits
        # are not set or if any is None or NaN
        if self._limits is not None and self.range.range is not None:
            limits = []
            if len(self._limits) == len(self.range.range):
                for l, r in zip(self._limits, self.range.range):
                    value = r if pd.isnull(l) else l
                    limits.append(value)
            else:
                limits = self._limits
            return tuple(limits)
        return self.range.range
scale.py 文件源码 项目:plotnine 作者: has2k1 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def map(self, x, limits=None):
        """
        Return an array-like of x mapped to values
        from the scales palette
        """
        if limits is None:
            limits = self.limits

        n = sum(~pd.isnull(list(limits)))
        pal = self.palette(n)
        if isinstance(pal, dict):
            # manual palette with specific assignments
            pal_match = [pal[val] for val in x]
        else:
            pal = np.asarray(pal)
            pal_match = pal[match(x, limits)]
            pal_match[pd.isnull(pal_match)] = self.na_value
        return pal_match
transform.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def _mode(x, def_fill=ImputerMixin._def_fill):
    """Get the most common value in a 1d
    H2OFrame. Ties will be handled in a non-specified
    manner.

    Parameters
    ----------

    x : ``H2OFrame``, shape=(n_samples, 1)
        The 1d frame from which to derive the mode
    """
    idx = x.as_data_frame(use_pandas=True)[x.columns[0]].value_counts().index

    # if the most common is null, then return the next most common.
    # if there is no next common (i.e., 100% null) then we return the def_fill
    return idx[0] if not pd.isnull(idx[0]) else idx[1] if idx.shape[0] > 1 else def_fill
map_camps_timehistory.py 文件源码 项目:Visualflee 作者: cspgdds 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_loctype(location, date_index):
    """Returns a pandas Series of the location type for each day.

    Locations with a changetime have type *city* before that day, and *conflict*
    after it.
    """
    n_days = len(date_index)
    changetime = location.time
    if pd.isnull(changetime):
        loctype = location.location_type
    else:
        #0:changetime, loctype = "city"
        loctype = ['city'] * int(changetime)
        #changetime:-1, loctype = "conflict"
        loctype +=['conflict'] * int(n_days - changetime)
    return pd.Series(loctype, index=date_index)
rl_compare.py 文件源码 项目:labutils 作者: networks-lab 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def compare_except(s1, s2, exceptions=[]):
    conc = pd.concat([s1, s2], axis=1, ignore_index=True)

    def except_apply(x):
        try:
            str1 = x[0]
            str2 = x[1]

            for ex in exceptions:
                str1 = str1.replace(ex, "")

            return jellyfish.jaro_distance(str1, str2)

        except Exception as err:
            if pd.isnull(x[0]) or pd.isnull(x[1]):
                return np.nan
            else:
                raise err

    return conc.apply(except_apply, axis=1)
data_funcs.py 文件源码 项目:MultimodalAutoencoder 作者: natashamjaques 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def find_null_columns(df, features):
    """Locates columns in a pandas dataframe that have no values. 

    Args:
        df: A pandas dataframe containing data. 
        wanted_feats: A list of string names of columns storing the actual data.

    Returns: A list of string names of the null columns.
    """
    df_len = len(df)
    bad_feats = []
    for feat in features:
        null_len = len(df[df[feat].isnull()])
        if df_len == null_len:
            bad_feats.append(feat)
    return bad_feats
factor_loader.py 文件源码 项目:WindAdapter 作者: iLampard 项目源码 文件源码 阅读 115 收藏 0 点赞 0 评论 0
def _merge_query_params(self, params, date=None):
        ret = ''
        for key, value in params.iteritems():
            if key == 'tenor' and pd.isnull(value):
                ret += 'tradeDate=' + date + ';'
            elif not pd.isnull(value):
                if key == Header.TENOR:
                    py_assert(date is not None, ValueError, 'date must be given if tenor is not None')
                    # unit = ''.join(re.findall('[0-9]+', params[Header.TENOR]))
                    # freq = FreqType(params[Header.TENOR][len(unit):])
                    ret += 'startDate=' + WIND_DATA_PROVIDER.forward_date(date, value,
                                                                          self.date_format) + ';endDate=' + date + ';'
                elif key == Header.FREQ and value[:3] == 'min':
                    ret += ('BarSize=' + value[3:] + ';')
                else:
                    ret += (key + '=' + str(value) + ';')
        ret = ret[:-1] + FactorLoader._check_industry_params(params.name)
        return ret
individual.py 文件源码 项目:lineage 作者: apriha 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def _complement_bases(self, genotype):
        if pd.isnull(genotype):
            return np.nan

        complement = ''

        for base in list(genotype):
            if base == 'A':
                complement += 'T'
            elif base == 'G':
                complement += 'C'
            elif base == 'C':
                complement += 'G'
            elif base == 'T':
                complement += 'A'

        return complement
core.py 文件源码 项目:SSieve 作者: davidimprovz 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def cleanNullColumns(sheet):
        """
        Helper function to discard columns in sheets where each value in column is null.

        Accepts a DataFrame as the sheet argument.

        Returns the cleaned dataframe or an error Tuple of (False, error)
        """
        try:# check for and remove columns with all NaNs
            for column in sheet.columns: 
                if pd.isnull(sheet[column]).all():
                    sheet.drop(column, axis=1, inplace=True)
            return sheet

        except Exception as e:
            return False, e
clients.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def get_isd_data(self, station, year):

        filename_format = '/pub/data/noaa/{year}/{station}-{year}.gz'
        lines = self._retreive_file_lines(filename_format, station, year)

        dates = pd.date_range("{}-01-01 00:00".format(year),
                              "{}-12-31 23:00".format(int(year) + 1),
                              freq='H', tz=pytz.UTC)
        series = pd.Series(None, index=dates, dtype=float)

        for line in lines:
            if line[87:92].decode('utf-8') == "+9999":
                temp_C = float("nan")
            else:
                temp_C = float(line[87:92]) / 10.
            date_str = line[15:27].decode('utf-8')

            # there can be multiple readings per hour, so set all to minute 0
            dt = pytz.UTC.localize(datetime.strptime(date_str, "%Y%m%d%H%M")).replace(minute=0)

            # only set the temp if it's the first encountered in the hour.
            if pd.isnull(series.ix[dt]):
                series[dt] = temp_C

        return series
formatters.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def get_input_data_mask(self, input_data):
        ''' Boolean list of missing/not missing values:
            True  => missing
            False => not missing
        '''
        trace_data, temp_data = input_data
        dts = []
        mask = []
        if trace_data.empty or temp_data.empty:
            return pd.Series(mask)
        for (start, energy), (p, group) in zip(
                trace_data.iteritems(),
                temp_data.groupby(level="period")):
            temps = group.copy()
            temps.index = temps.index.droplevel()
            daily_temps = temps.resample('D').apply(np.mean)[0]
            for i, tempF in daily_temps.iteritems():
                dts.append(i)
                mask.append(pd.isnull(energy) or pd.isnull(tempF))
        return pd.Series(mask, index=dts)
test_arbitrary_serializer.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def test_multiple_records_with_gap(serializer):
    records = [
        {
            "start": datetime(2000, 1, 1, tzinfo=pytz.UTC),
            "end": datetime(2000, 1, 2, tzinfo=pytz.UTC),
            "value": 1,
        },
        {
            "start": datetime(2000, 1, 3, tzinfo=pytz.UTC),
            "end": datetime(2000, 1, 4, tzinfo=pytz.UTC),
            "value": 2,
        },
    ]
    df = serializer.to_dataframe(records)
    assert df.value[datetime(2000, 1, 1, tzinfo=pytz.UTC)] == 1
    assert not df.estimated[datetime(2000, 1, 1, tzinfo=pytz.UTC)]

    assert pd.isnull(df.value[datetime(2000, 1, 2, tzinfo=pytz.UTC)])
    assert not df.estimated[datetime(2000, 1, 2, tzinfo=pytz.UTC)]

    assert df.value[datetime(2000, 1, 3, tzinfo=pytz.UTC)] == 2
    assert not df.estimated[datetime(2000, 1, 3, tzinfo=pytz.UTC)]

    assert pd.isnull(df.value[datetime(2000, 1, 4, tzinfo=pytz.UTC)])
    assert not df.estimated[datetime(2000, 1, 4, tzinfo=pytz.UTC)]
test_arbitrary_start_serializer.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_multiple_records(serializer):
    records = [
        {
            "start": datetime(2000, 1, 1, tzinfo=pytz.UTC),
            "value": 1,
        },
        {
            "start": datetime(2000, 1, 2, tzinfo=pytz.UTC),
            "value": 2,
        },
    ]
    df = serializer.to_dataframe(records)
    assert df.value[datetime(2000, 1, 1, tzinfo=pytz.UTC)] == 1
    assert not df.estimated[datetime(2000, 1, 1, tzinfo=pytz.UTC)]

    assert pd.isnull(df.value[datetime(2000, 1, 2, tzinfo=pytz.UTC)])
    assert not df.estimated[datetime(2000, 1, 2, tzinfo=pytz.UTC)]
test_arbitrary_end_serializer.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def test_multiple_records(serializer):
    records = [
        {
            "end": datetime(2000, 1, 1, tzinfo=pytz.UTC),
            "value": 1,
        },
        {
            "end": datetime(2000, 1, 2, tzinfo=pytz.UTC),
            "value": 2,
        },
    ]
    df = serializer.to_dataframe(records)
    assert df.value[datetime(2000, 1, 1, tzinfo=pytz.UTC)] == 2
    assert not df.estimated[datetime(2000, 1, 1, tzinfo=pytz.UTC)]

    assert pd.isnull(df.value[datetime(2000, 1, 2, tzinfo=pytz.UTC)])
    assert not df.estimated[datetime(2000, 1, 2, tzinfo=pytz.UTC)]


问题


面经


文章

微信
公众号

扫码关注公众号