python类nan()的实例源码-第3页-面圈网

tests.py 文件源码项目：smoomapy 作者: mthh 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_input_with_missing_values(self):
        gdf = GeoDataFrame.from_file("misc/nuts3_data.geojson")
        gdf.loc[12:18, "gdppps2008"] = np.NaN
        StePot = SmoothStewart(gdf, "gdppps2008",
                               span=65000, beta=2, resolution=100000,
                               mask=gdf)
        result = StePot.render(9, "equal_interval", output="Geodataframe")
        self.assertIsInstance(result, GeoDataFrame)
        self.assertEqual(len(result), 9)

        gdf2 = GeoDataFrame.from_file('misc/nuts3_data.geojson').to_crs({"init": "epsg:3035"})
        gdf2.loc[:, 'gdppps2008'] = gdf2['gdppps2008'].astype(object)
        gdf2.loc[15:20, 'gdppps2008'] = ""
        gdf2.loc[75:78, 'gdppps2008'] = ""
        StePot = SmoothStewart(gdf2, 'gdppps2008', span=65000, beta=2, resolution=48000, mask=gdf2)
        result = StePot.render(9, 'equal_interval', output="GeoDataFrame")
        self.assertIsInstance(result, GeoDataFrame)
        self.assertEqual(len(result), 9)

tests.py 文件源码项目：smoomapy 作者: mthh 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_wrong_dtype_missing_values(self):
        gdf = GeoDataFrame.from_file("misc/nuts3_data.geojson")
        gdf.loc[12:18, "gdppps2008"] = np.NaN
        gdf.loc[25:35, "pop2008"] = np.NaN
        gdf.loc[0:len(gdf)-1, "pop2008"] = gdf["pop2008"].astype(str)
        StePot = SmoothStewart(gdf, "gdppps2008",
                               span=65000, beta=2, resolution=100000,
                               mask="misc/nuts3_data.geojson")
        result = StePot.render(9, "equal_interval", output="Geodataframe")
        self.assertIsInstance(result, GeoDataFrame)
        self.assertEqual(len(result), 9)

        StePot = SmoothStewart(gdf, "gdppps2008", variable_name2="pop2008",
                               span=65000, beta=2, resolution=100000,
                               mask="misc/nuts3_data.geojson")
        result = StePot.render(9, "equal_interval", output="Geodataframe")
        self.assertIsInstance(result, GeoDataFrame)
        self.assertEqual(len(result), 9)

test_algorithm.py 文件源码项目：catalyst 作者: enigmampc 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_run_twice(self):
        algo1 = TestRegisterTransformAlgorithm(
            sim_params=self.sim_params,
            sids=[0, 1],
            env=self.env,
        )

        res1 = algo1.run(self.data_portal)

        # Create a new trading algorithm, which will
        # use the newly instantiated environment.
        algo2 = TestRegisterTransformAlgorithm(
            sim_params=self.sim_params,
            sids=[0, 1],
            env=self.env,
        )

        res2 = algo2.run(self.data_portal)

        # There are some np.NaN values in the first row because there is not
        # enough data to calculate the metric, e.g. beta.
        res1 = res1.fillna(value=0)
        res2 = res2.fillna(value=0)

        np.testing.assert_array_equal(res1, res2)

test_quarters_estimates.py 文件源码项目：catalyst 作者: enigmampc 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def make_expected_timelines_2q_out(cls):
        return {
            pd.Timestamp('2015-01-06', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 3)
            },
            pd.Timestamp('2015-01-07', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 3)
            },
            pd.Timestamp('2015-01-08', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 3)
            },
            pd.Timestamp('2015-01-09', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 3)
            },
            pd.Timestamp('2015-01-12', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 2 +
                                      [[2100 * 3., 2110. * 4]])
            }
        }

test_quarters_estimates.py 文件源码项目：catalyst 作者: enigmampc 项目源码文件源码阅读 66 收藏 0 点赞 0 评论 0

def make_expected_timelines_1q_out(cls):
        return {
            pd.Timestamp('2015-01-06', tz='utc'): {
                'estimate1': np.array([[np.NaN, np.NaN]] +
                                      [[1100. * 1/.3, 1110. * 1/.4]] * 2),
                'estimate2': np.array([[np.NaN, np.NaN]] +
                                      [[2100. * 1/.3, 2110. * 1/.4]] * 2),
            },
            pd.Timestamp('2015-01-07', tz='utc'): {
                'estimate1': np.array([[1100., 1110.]] * 3),
                'estimate2': np.array([[2100., 2110.]] * 3)
            },
            pd.Timestamp('2015-01-08', tz='utc'): {
                'estimate1': np.array([[1100., 1110.]] * 3),
                'estimate2': np.array([[2100., 2110.]] * 3)
            },
            pd.Timestamp('2015-01-09', tz='utc'): {
                'estimate1': np.array([[1100 * 3., 1210. * 4]] * 3),
                'estimate2': np.array([[2100 * 3., 2210. * 4]] * 3)
            },
            pd.Timestamp('2015-01-12', tz='utc'): {
                'estimate1': np.array([[1200 * 3., np.NaN]] * 3),
                'estimate2': np.array([[2200 * 3., np.NaN]] * 3)
            }
        }

test_quarters_estimates.py 文件源码项目：catalyst 作者: enigmampc 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def make_expected_timelines_2q_out(cls):
        return {
            pd.Timestamp('2015-01-06', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] +
                                      [[2200 * 1/.3, 2210. * 1/.4]] * 2)
            },
            pd.Timestamp('2015-01-07', tz='utc'): {
                'estimate2': np.array([[2200., 2210.]] * 3)
            },
            pd.Timestamp('2015-01-08', tz='utc'): {
                'estimate2': np.array([[2200, 2210.]] * 3)
            },
            pd.Timestamp('2015-01-09', tz='utc'): {
                'estimate2': np.array([[2200 * 3., np.NaN]] * 3)
            },
            pd.Timestamp('2015-01-12', tz='utc'): {
                'estimate2': np.array([[np.NaN, np.NaN]] * 3)
            }
        }

features.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _calculate(self, X, y, categorical, metafeatures, helpers):
        import sklearn.lda
        if len(y.shape) == 1 or y.shape[1] == 1:
            kf = cross_validation.StratifiedKFold(y, n_folds=10)
        else:
            kf = cross_validation.KFold(y.shape[0], n_folds=10)

        accuracy = 0.
        try:
            for train, test in kf:
                lda = sklearn.lda.LDA()

                if len(y.shape) == 1 or y.shape[1] == 1:
                    lda.fit(X[train], y[train])
                else:
                    lda = OneVsRestClassifier(lda)
                    lda.fit(X[train], y[train])

                predictions = lda.predict(X[test])
                accuracy += sklearn.metrics.accuracy_score(predictions, y[test])
            return accuracy / 10
        except LinAlgError as e:
            self.logger.warning("LDA failed: %s Returned 0 instead!" % e)
            return np.NaN
        except ValueError as e:
            self.logger.warning("LDA failed: %s Returned 0 instead!" % e)
            return np.NaN

spectral_data.py 文件源码项目：PySAT 作者: USGS-Astrogeology 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def stratified_folds(self, nfolds=5, sortby=None):
        self.df[('meta', 'Folds')] = np.NaN  # Create an entry in the data frame that holds the folds
        self.df.sort_values(by=sortby, inplace=True)  # sort the data frame by the column of interest
        uniqvals = np.unique(self.df[sortby])  # get the unique values from the column of interest

        # assign folds by stepping through the unique values
        fold_num = 1
        for i in uniqvals:
            ind = self.df[sortby] == i  # find where the data frame matches the unique value
            self.df.set_value(self.df.index[ind], ('meta', 'Folds'), fold_num)
            # Inrement the fold number, reset to 1 if it is greater than the desired number of folds
            fold_num = fold_num + 1
            if fold_num > nfolds:
                fold_num = 1

        # sort by index to return the df to its original order
        self.df.sort_index(inplace=True)
        self.folds_hist(sortby,50)

test_graynet.py 文件源码项目：graynet 作者: raamana 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_invalid_edge_range():
    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=-1)

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=[])

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=[1, ])

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=[1, 2, 3])

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=(1, np.NaN))

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, edge_range=(2, 1))

test_graynet.py 文件源码项目：graynet 作者: raamana 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def test_invalid_nbins():
    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, num_bins=np.NaN)

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, num_bins=np.Inf)

    with raises(ValueError):
        ew = graynet.extract(subject_id_list, fs_dir, num_bins=2)


# test_multi_edge()
# test_multi_edge_CLI()
# test_empty_subject_list()
# test_run_no_IO()
# test_run_roi_stats_via_API()
# test_run_roi_stats_via_CLI()
# test_CLI_only_weight_or_stats()

cluster.py 文件源码项目：xlearn 作者: wy2136 项目源码文件源码阅读 53 收藏 0 点赞 0 评论 0

def predict(self, da):
        '''xarray.DataArray version of sklearn.cluster.KMeans.fit.'''

        # compatible with the sklean.cluster.KMeans predict method when the input data is not DataArray
        if not isinstance(da, xr.DataArray):
            return super().predict(da)

        # retrieve parameters
        n_samples = da.shape[0]
        features_shape = da.shape[1:]
        n_features = np.prod(features_shape)

        X = da.data.reshape(n_samples, n_features)# 'data' might be replaced with 'values'.
        # remove NaN values if exists in X
        try:
            X_valid = X[:, self.valid_features_index_]
        except:
            X_valid = X
        samples_dim = da.dims[0]
        samples_coord = {samples_dim: da.coords[samples_dim]}
        labels = xr.DataArray(super().predict(X_valid),
            dims=samples_dim, coords=samples_coord)

        return labels

Experiment.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def create_trial(self):
        trial = dict()
        # Status of the trial object
        trial['status'] = 0
        trial['params'] = dict()
        # Stores the validation error
        trial['result'] = np.NaN
        trial['test_error'] = np.NaN
        # Validation error for every instance
        trial['instance_results'] = np.ones((self.folds)) * np.NaN
        # Status for every instance
        trial['instance_status'] = np.zeros((self.folds), dtype=int)
        # Contains the standard deviation in case of cross validation
        trial['std'] = np.NaN
        # Accumulated duration over all instances
        trial['duration'] = np.NaN
        # Stores the duration for every instance
        trial['instance_durations'] = np.ones((self.folds)) * np.NaN
        return trial

Experiment.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def get_arg_best(self):
        best_idx = -1
        best_value = sys.maxint
        for i, trial in enumerate(self.trials):
            tmp_res = np.NaN
            if np.isfinite(trial['result']):
                tmp_res = trial['result']
            elif np.isfinite(trial['instance_results']).any():
                tmp_res = wrapping_util.nan_mean(trial['instance_results'])
                # np.nanmean is not available in older numpy versions
                # tmp_res = scipy.nanmean(trial['instance_results'])
            else:
                continue
            if tmp_res < best_value:
                best_idx = i
                best_value = tmp_res
        if best_idx == -1:
            raise ValueError("No best value found.")
        return best_idx

    # Get the best value so far, for more documentation see get_arg_best

benchmark_functions.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def save_har6(params, **kwargs):
    if "x" not in params or "y" not in params or "z" not in params \
       or "a" not in params or "b" not in params or "c" not in params:
        sys.stderr.write("No params found ['x', 'y']\n")
        return np.NaN
    x = float(params["x"])
    y = float(params["y"])
    z = float(params["z"])
    a = float(params["a"])
    b = float(params["b"])
    c = float(params["c"])
    if type(x) == np.ndarray:
        x = x[0]
        y = y[0]
        z = z[0]
        a = a[0]
        b = b[0]
        c = c[0]
    return har6(x, y, z, a, b, c)

runsolver_wrapper.py 文件源码项目：FLASH 作者: yuyuz 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def get_trial_index(experiment, fold, params):
    # Check whether we are in a new configuration; This has to check whether
    # the params were already inserted but also whether the fold already run
    # This is checked twice; the instance_result has to be not NaN and the
    # entry in instance_order has to exist
    new = True
    trial_index = np.NaN
    for idx, trial in enumerate(experiment.trials):
        exp = trial['params']
        if exp == params and (idx, fold) not in experiment.instance_order and \
                (experiment.get_trial_from_id(idx)['instance_results'][fold] == np.NaN or
                 experiment.get_trial_from_id(idx)['instance_results'][fold] !=
                 experiment.get_trial_from_id(idx)['instance_results'][fold]):
            new = False
            trial_index = idx
            break
    if new:
        trial_index = experiment.add_job(params)
    return trial_index

test_categorical_imputer.py 文件源码项目：coremltools 作者: apple 项目源码文件源码阅读 81 收藏 0 点赞 0 评论 0

def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Imputer(strategy='most_frequent', axis=0)
        scikit_data['data'][1,8] = np.NaN

        input_data = scikit_data['data'][:,8].reshape(-1, 1)
        scikit_model.fit(input_data, scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model

tools.py 文件源码项目：CElegansBehaviour 作者: ChristophKirst 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def calculateDeltas(data, n = 10):
  """create the distnances travelled since the last n time steps"""

  dx = np.diff(data[:,0]);
  dy = np.diff(data[:,1]);

  dx0 = dx.copy();
  dy0 = dy.copy();

  delta = np.zeros((dx.shape[0], n));
  for i in range(n):
    delta[:,i] = np.sqrt(dx * dx + dy * dy);
    if i < n-1:
      dx0 = np.concatenate([[np.NaN], dx0[:-1]]);
      dy0 = np.concatenate([[np.NaN], dy0[:-1]]);
      dx += dx0;
      dy += dy0;

  return delta;

ols.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _rolling_rank(self):
        dates = self._index
        window = self._window

        ranks = np.empty(len(dates), dtype=float)
        ranks[:] = np.NaN
        for i, date in enumerate(dates):
            if self._is_rolling and i >= window:
                prior_date = dates[i - window + 1]
            else:
                prior_date = dates[0]

            x_slice = self._x.truncate(before=prior_date, after=date).values

            if len(x_slice) == 0:
                continue

            ranks[i] = math.rank(x_slice)

        return ranks

strings.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def str_endswith(arr, pat, na=np.nan):
    """
    Return boolean Series indicating whether each string in the
    Series/Index ends with passed pattern. Equivalent to
    :meth:`str.endswith`.

    Parameters
    ----------
    pat : string
        Character sequence
    na : bool, default NaN

    Returns
    -------
    endswith : Series/array of boolean values
    """
    f = lambda x: x.endswith(pat)
    return _na_map(f, arr, na, dtype=bool)

algorithms.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def nsmallest(arr, n, keep='first'):
    """
    Find the indices of the n smallest values of a numpy array.

    Note: Fails silently with NaN.
    """
    if keep == 'last':
        arr = arr[::-1]

    narr = len(arr)
    n = min(n, narr)

    sdtype = str(arr.dtype)
    arr = arr.view(_dtype_map.get(sdtype, sdtype))

    kth_val = algos.kth_smallest(arr.copy(), n - 1)
    return _finalize_nsmallest(arr, kth_val, n, keep, narr)

test_resample.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_resample_consistency(self):

        # GH 6418
        # resample with bfill / limit / reindex consistency

        i30 = pd.date_range('2002-02-02', periods=4, freq='30T')
        s = pd.Series(np.arange(4.), index=i30)
        s[2] = np.NaN

        # Upsample by factor 3 with reindex() and resample() methods:
        i10 = pd.date_range(i30[0], i30[-1], freq='10T')

        s10 = s.reindex(index=i10, method='bfill')
        s10_2 = s.reindex(index=i10, method='bfill', limit=2)
        rl = s.reindex_like(s10, method='bfill', limit=2)
        r10_2 = s.resample('10Min').bfill(limit=2)
        r10 = s.resample('10Min').bfill()

        # s10_2, r10, r10_2, rl should all be equal
        assert_series_equal(s10_2, r10)
        assert_series_equal(s10_2, r10_2)
        assert_series_equal(s10_2, rl)

test_analytics.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def test_skew(self):
        tm._skip_if_no_scipy()

        from scipy.stats import skew
        alt = lambda x: skew(x, bias=False)
        self._check_stat_op('skew', alt)

        # test corner cases, skew() returns NaN unless there's at least 3
        # values
        min_N = 3
        for i in range(1, min_N + 1):
            s = Series(np.ones(i))
            df = DataFrame(np.ones((i, i)))
            if i < min_N:
                self.assertTrue(np.isnan(s.skew()))
                self.assertTrue(np.isnan(df.skew()).all())
            else:
                self.assertEqual(0, s.skew())
                self.assertTrue((df.skew() == 0).all())

test_analytics.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_kurt(self):
        tm._skip_if_no_scipy()

        from scipy.stats import kurtosis
        alt = lambda x: kurtosis(x, bias=False)
        self._check_stat_op('kurt', alt)

        index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
                           labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2],
                                   [0, 1, 0, 1, 0, 1]])
        s = Series(np.random.randn(6), index=index)
        self.assertAlmostEqual(s.kurt(), s.kurt(level=0)['bar'])

        # test corner cases, kurt() returns NaN unless there's at least 4
        # values
        min_N = 4
        for i in range(1, min_N + 1):
            s = Series(np.ones(i))
            df = DataFrame(np.ones((i, i)))
            if i < min_N:
                self.assertTrue(np.isnan(s.kurt()))
                self.assertTrue(np.isnan(df.kurt()).all())
            else:
                self.assertEqual(0, s.kurt())
                self.assertTrue((df.kurt() == 0).all())

test_analytics.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_count(self):
        self.assertEqual(self.ts.count(), len(self.ts))

        self.ts[::2] = np.NaN

        self.assertEqual(self.ts.count(), np.isfinite(self.ts).sum())

        mi = MultiIndex.from_arrays([list('aabbcc'), [1, 2, 2, nan, 1, 2]])
        ts = Series(np.arange(len(mi)), index=mi)

        left = ts.count(level=1)
        right = Series([2, 3, 1], index=[1, 2, nan])
        assert_series_equal(left, right)

        ts.iloc[[0, 3, 5]] = nan
        assert_series_equal(ts.count(level=1), right - 1)

test_timeseries.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        self.assertEqual(index, ts.index[-6])

        ts[:] = np.nan
        self.assertIsNone(ts.last_valid_index())
        self.assertIsNone(ts.first_valid_index())

        ser = Series([], index=[])
        self.assertIsNone(ser.last_valid_index())
        self.assertIsNone(ser.first_valid_index())

test_constructors.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def test_constructor_dict_timedelta_index(self):
        # GH #12169 : Resample category data with timedelta index
        # construct Series from dict as data and TimedeltaIndex as index
        # will result NaN in result Series data
        expected = Series(
            data=['A', 'B', 'C'],
            index=pd.to_timedelta([0, 10, 20], unit='s')
        )

        result = Series(
            data={pd.to_timedelta(0, unit='s'): 'A',
                  pd.to_timedelta(10, unit='s'): 'B',
                  pd.to_timedelta(20, unit='s'): 'C'},
            index=pd.to_timedelta([0, 10, 20], unit='s')
        )
        # this should work
        assert_series_equal(result, expected)

test_constructors.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_fromValue(self):

        nans = Series(np.NaN, index=self.ts.index)
        self.assertEqual(nans.dtype, np.float_)
        self.assertEqual(len(nans), len(self.ts))

        strings = Series('foo', index=self.ts.index)
        self.assertEqual(strings.dtype, np.object_)
        self.assertEqual(len(strings), len(self.ts))

        d = datetime.now()
        dates = Series(d, index=self.ts.index)
        self.assertEqual(dates.dtype, 'M8[ns]')
        self.assertEqual(len(dates), len(self.ts))

        # GH12336
        # Test construction of categorical series from value
        categorical = Series(0, index=self.ts.index, dtype="category")
        expected = Series(0, index=self.ts.index).astype("category")
        self.assertEqual(categorical.dtype, 'category')
        self.assertEqual(len(categorical), len(self.ts))
        tm.assert_series_equal(categorical, expected)

test_format.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def test_to_string_format_na(self):
        self.reset_display_options()
        df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4],
                        'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('        A       B\n'
                    '0     NaN     NaN\n'
                    '1 -1.0000     foo\n'
                    '2 -2.1234   foooo\n'
                    '3  3.0000  fooooo\n'
                    '4  4.0000     bar')
        self.assertEqual(result, expected)

        df = DataFrame({'A': [np.nan, -1., -2., 3., 4.],
                        'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('     A       B\n'
                    '0  NaN     NaN\n'
                    '1 -1.0     foo\n'
                    '2 -2.0   foooo\n'
                    '3  3.0  fooooo\n'
                    '4  4.0     bar')
        self.assertEqual(result, expected)

test_format.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_to_csv_na_rep(self):
        # testing if NaN values are correctly represented in the index
        # GH 11553
        df = DataFrame({'a': [0, np.NaN], 'b': [0, 1], 'c': [2, 3]})
        expected = "a,b,c\n0.0,0,2\n_,1,3\n"
        self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected)
        self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected)

        # now with an index containing only NaNs
        df = DataFrame({'a': np.NaN, 'b': [0, 1], 'c': [2, 3]})
        expected = "a,b,c\n_,0,2\n_,1,3\n"
        self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected)
        self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected)

        # check if na_rep parameter does not break anything when no NaN
        df = DataFrame({'a': 0, 'b': [0, 1], 'c': [2, 3]})
        expected = "a,b,c\n0,0,2\n0,1,3\n"
        self.assertEqual(df.set_index('a').to_csv(na_rep='_'), expected)
        self.assertEqual(df.set_index(['a', 'b']).to_csv(na_rep='_'), expected)

test_format.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 53 收藏 0 点赞 0 评论 0

def test_to_string_mixed(self):
        s = Series(['foo', np.nan, -1.23, 4.56])
        result = s.to_string()
        expected = (u('0     foo\n') + u('1     NaN\n') + u('2   -1.23\n') +
                    u('3    4.56'))
        self.assertEqual(result, expected)

        # but don't count NAs as floats
        s = Series(['foo', np.nan, 'bar', 'baz'])
        result = s.to_string()
        expected = (u('0    foo\n') + '1    NaN\n' + '2    bar\n' + '3    baz')
        self.assertEqual(result, expected)

        s = Series(['foo', 5, 'bar', 'baz'])
        result = s.to_string()
        expected = (u('0    foo\n') + '1      5\n' + '2    bar\n' + '3    baz')
        self.assertEqual(result, expected)