python类notnull()的实例源码

trace.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def yield_records(self, sorted_records):

        n = len(sorted_records)
        for i, record in enumerate(sorted_records):

            self.validate_record(record)

            start = record["start"]
            value = record["value"]
            estimated = record.get("estimated", False)

            if i < n - 1:  # all except last record
                yield (start, value, estimated)
            else:  # last record
                end = record.get("end", None)
                if end is None:
                    # can't use the value of this record, no end date
                    yield (start, np.nan, False)
                else:

                    self._validate_record_start_end(record, start, end)

                    # provide an end date cap
                    if pd.notnull(value):
                        yield (start, value, estimated)
                        yield (end, np.nan, False)
                    else:
                        yield (start, np.nan, False)
formatters.py 文件源码 项目:eemeter 作者: openeemeter 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def serialize_input(self, input_data):
        ''' Serialize input data
        '''
        return OrderedDict([
            (start.isoformat(), OrderedDict([
                ("energy", row.energy if pd.notnull(row.energy) else None),
                ("tempF", row.tempF if pd.notnull(row.tempF) else None),
            ]))
            for start, row in input_data.iterrows()
        ])
dataframe_utils.py 文件源码 项目:fileflow 作者: industrydive 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_and_clean_csv_to_dataframe(filename_or_stream, encoding='utf-8'):
    """
    Reads a utf-8 encoded CSV directly into a pandas dataframe as string values and scrubs np.NaN values to Python None

    :param str filename_or_stream: path to CSV
    :return:
    """
    # pulls data in as utf8, all as strings, and without pre whitespace padding
    try:
        data = pd.read_csv(
            filepath_or_buffer=filename_or_stream,
            encoding=encoding,
            dtype=str,
            skipinitialspace=True
        )
    except AttributeError:
        # this is an empty dataframe and pandas crashed because it can't coerce the columns to strings
        # issue and PR to fix is open on pandas core at https://github.com/pydata/pandas/issues/12048
        # slated for 1.8 release
        # so for now just try loading the dataframe without specifying dtype
        data = pd.read_csv(
            filepath_or_buffer=filename_or_stream,
            encoding=encoding,
            skipinitialspace=True
        )
    logging.info('File read via the pandas read_csv methodology.')

    # coerces pandas nulls (of np.NaN type) into python None
    data = data.where((pd.notnull(data)), None)

    # coerces string representations of Python None to a real Python None
    data[data == 'None'] = None
    data[data == ''] = None
    logging.info("Dataframe of shape %s has been retrieved." % str(data.shape))

    return data
earnings_estimates.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def __init__(self,
                 estimates,
                 name_map):
        validate_column_specs(
            estimates,
            name_map
        )

        self.estimates = estimates[
            estimates[EVENT_DATE_FIELD_NAME].notnull() &
            estimates[FISCAL_QUARTER_FIELD_NAME].notnull() &
            estimates[FISCAL_YEAR_FIELD_NAME].notnull()
        ]
        self.estimates[NORMALIZED_QUARTERS] = normalize_quarters(
            self.estimates[FISCAL_YEAR_FIELD_NAME],
            self.estimates[FISCAL_QUARTER_FIELD_NAME],
        )

        self.array_overwrites_dict = {
            datetime64ns_dtype: Datetime641DArrayOverwrite,
            float64_dtype: Float641DArrayOverwrite,
        }
        self.scalar_overwrites_dict = {
            datetime64ns_dtype: Datetime64Overwrite,
            float64_dtype: Float64Overwrite,
        }

        self.name_map = name_map
        self._columns = set(name_map.keys())
pandas_utils.py 文件源码 项目:betterself 作者: jeffshek 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def update_dataframe_to_be_none_instead_of_nan_for_api_responses(df):
    df = df.where((pd.notnull(df)), None)
    return df
views.py 文件源码 项目:betterself 作者: jeffshek 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_sorted_response(series):
    if series.dropna().empty:
        return NO_DATA_RESPONSE

    # Do a odd sorted tuple response because Javascript sorting is an oddly difficult problem
    # sorted_response = [item for item in series.iteritems()]
    sorted_response = []
    for index, value in series.iteritems():
        if not pd.notnull(value):
            value = None

        data_point = (index, value)
        sorted_response.append(data_point)

    return Response(sorted_response)
test_categorization.py 文件源码 项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_api_categorization_sort(app, sort_by):
    n_categories = 2
    dsid, lsi_id, _, ds_input = get_features_lsi_cached(app, n_categories=n_categories)
    method = V01 + "/feature-extraction/{}".format(dsid)
    data = app.get_check(method)

    training_set = ds_input['training_set']

    pars = {
          'parent_id': lsi_id,
          'data': training_set,
          'method': 'NearestNeighbor'}

    method = V01 + "/categorization/"
    data = app.post_check(method, json=pars)
    mid = data['id']

    method = V01 + "/categorization/{}/predict".format(mid)

    data = app.get_check(method, json={'batch_id': -1, "sort_by": sort_by})

    res = []
    for row in data['data']:
        res_el = {'document_id': row['document_id']}
        for scores in row['scores']:
            res_el[scores['category']] = scores['score']
        res.append(res_el)

    df = pd.DataFrame(res)
    df = df.set_index('document_id')

    if sort_by in df.columns:
        mask = pd.notnull(df[sort_by])
        assert_array_equal(df[mask].index.values,
                           df[mask].sort_values(sort_by, ascending=False).index.values)
process.py 文件源码 项目:pyprocessmacro 作者: QuentinAndre 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def _prepare_data(self):
        """
        Subset the dataframe to the columns needed for estimation purposes, and add a constant.
        :return: pd.DataFrame
        """
        # Subset the data to the columns used in the model
        data = self.data[self.varlist].copy()
        data = data[pd.notnull(data)].reset_index(drop=True)

        # Mapping each variable name to a unique variable code, and renaming the columns in the data.)
        data.rename(columns=self._var_to_symb, inplace=True)

        # Adding a constant to the data.
        data["Cons"] = 1

        if self.options["logit"]:
            endog = data["y"]
            uniques = np.unique(endog)
            if len(uniques) != 2:
                raise ValueError(
                    "The dependent variable does not have exactly two distinct outcomes."
                    "Please provide another dataset or change the 'logit' option to 0")
            else:
                endog_logit = [0 if i == uniques[0] else 1 for i in endog]
            data["y"] = endog_logit
        return data
stockstats.py 文件源码 项目:stockstats 作者: jealous 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def remove_random_nan(pd_obj):
        return pd_obj.where((pd.notnull(pd_obj)), None)
Sensitivity.py 文件源码 项目:gullikson-scripts 作者: kgullikson88 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def split_by_component(df):
    df['prim_comp'] = df.Comp.map(lambda s: s[0])
    df['sec_comp'] = df.Comp.map(lambda s: s[-1])
    comps = pd.concat((df[['prim_comp', 'Sp1']], df[['sec_comp', 'Sp2']]))
    prim = comps.loc[comps.prim_comp.notnull()].rename(columns={'Sp1': 'SpT', 'prim_comp': 'comp'})
    sec = comps.loc[comps.sec_comp.notnull()].rename(columns={'Sp2': 'SpT', 'sec_comp': 'comp'})
    return pd.concat((prim, sec))[['comp', 'SpT']].drop_duplicates(subset='comp')
add-memberships.py 文件源码 项目:popit-scripts 作者: open-hluttaw 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def add_committee():

    df = pandas.DataFrame.from_csv('data/mp-en.csv', header=0, index_col=False)
    df = df.where((pandas.notnull(df)), None)
    MPs = df.to_dict(orient='records')

    for mp in MPs:
        if mp['committee_memberships']:
            committees = [committee.strip() for committee in mp['committee_memberships'].split(',')]

            person_id = utils.hluttaw_to_popitid(mp['identifier__hluttaw'],
                                            base_url) 
            on_behalf_of_id = utils.org_name_to_popitid(mp['group'],base_url)


            for org in committees:
                payload = {}

                payload['person_id'] = person_id
                payload['organization_id'] = utils.org_name_to_popitid(org,base_url)
                payload['on_behalf_of_id'] = on_behalf_of_id
                payload['role'] = 'Committee Member'
                payload['start_date'] = mp['start_date']

                url = base_url + '/en/memberships'
                r = requests.post(url,headers=headers,json=payload)
                print r.content
update-persons.py 文件源码 项目:popit-scripts 作者: open-hluttaw 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def update_my():

    lang = 'my'

    df = pandas.DataFrame.from_csv('data/mp-my.csv', header=1, index_col=False)
    df = df.where((pandas.notnull(df)), None)

    MPs = df.to_dict(orient='records')

    for mp in MPs:
        hluttaw_id = mp['identifier__hluttaw']

        popit_id = utils.hluttaw_to_popitid(hluttaw_id, base_url)

        print hluttaw_id
        print popit_id

        if popit_id:
            url = base_url + "/" + lang + "/persons/" + popit_id

            honorific_prefix = mp['honorific_prefix']
            name = mp['name']
            gender = mp['gender']
            national_identity = mp['national_identity']

            payload = { 
                        'honorific_prefix': honorific_prefix,
                        'name': name,
                        'gender': gender,
                        'national_identity': national_identity,
                        }

            r = requests.put(url, headers=headers, json=payload)
            print r.content
operators.py 文件源码 项目:zeex 作者: zbarge 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def not_null(x):
    return notnull(x) and str(x).lower() not in NULL_VALUES
pandatools.py 文件源码 项目:zeex 作者: zbarge 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def nan_coerce(x):
    v = str(x)
    if pd.notnull(v) is False or v in NAN_LIST:
        return np.nan
    return x
pandatools.py 文件源码 项目:zeex 作者: zbarge 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def remove_line_breaks(x):
    x = (str(x) if pd.notnull(x) else '')
    for b in LINE_BREAKS_LIST_RX:
        x = b.sub(" ", x)
    return string_blank_na(x.lstrip().rstrip())
test_ols.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test_longpanel_series_combo(self):
        wp = tm.makePanel()
        lp = wp.to_frame()

        y = lp.pop('ItemA')
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=lp, entity_effects=True, window=20)
        self.assertTrue(notnull(model.beta.values).all())
        tm.assertIsInstance(model, PanelOLS)
        model.summary
test_panel.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_count(self):
        f = lambda s: notnull(s).sum()
        self._check_stat_op('count', f, obj=self.panel, has_skipna=False)
test_panel.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test_transpose_copy(self):
        panel = self.panel.copy()
        result = panel.transpose(2, 0, 1, copy=True)
        expected = panel.swapaxes('items', 'minor')
        expected = expected.swapaxes('major', 'minor')
        assert_panel_equal(result, expected)

        panel.values[0, 1, 1] = np.nan
        self.assertTrue(notnull(result.values[1, 0, 1]))
test_panel4d.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_count(self):
        f = lambda s: notnull(s).sum()
        self._check_stat_op('count', f, obj=self.panel4d, has_skipna=False)
test_indexing.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def test_setitem_always_copy(self):
        s = self.frame['A'].copy()
        self.frame['E'] = s

        self.frame['E'][5:10] = nan
        self.assertTrue(notnull(s[5:10]).all())


问题


面经


文章

微信
公众号

扫码关注公众号