python类unique()的实例源码-面圈网

visualizer.py 文件源码项目：coquery 作者: gkunter 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def get_levels(self, name):
        """
        Return a set containing all distinct values in the column 'name'.

        The values are returned in alphabetical order.

        Parameters
        ----------
        name : string
            The column name for which the unique values are requested

        Returns
        -------
        levels : list
            A unique list of all values that are contained in the specified
            data column.
        """
        return pd.unique(self._table[name].values.ravel())

daps_detection.py 文件源码项目：deep-action-proposals 作者: escorciav 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def wrapper_nms(proposal_df, overlap=0.65):
    """Apply non-max-suppresion to a video batch.
    """
    vds_unique = pd.unique(proposal_df['video-name'])
    new_proposal_df = []
    for i, v in enumerate(vds_unique):
        idx = proposal_df['video-name'] == v
        p = proposal_df.loc[idx, ['video-name', 'f-init', 'f-end',
                                  'score', 'video-frames']]
        n_frames = np.int(p['video-frames'].mean())
        loc = np.stack((p['f-init'], p['f-end']), axis=-1)
        loc, score = nms_detections(loc, np.array(p['score']), overlap)
        n_proposals = score.shape[0]
        n_frames = np.repeat(p['video-frames'].mean(), n_proposals).astype(int)
        this_df = pd.DataFrame({'video-name': np.repeat(v, n_proposals),
                                'f-init': loc[:, 0], 'f-end': loc[:, 1],
                                'score': score,
                                'video-frames': n_frames})
        new_proposal_df.append(this_df)
    return pd.concat(new_proposal_df, axis=0)

CCF_Systematics.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def get_detected_objects(df, tol=1.0, debug=False):
    """
    Takes a summary dataframe with RV information. Finds the median rv for each star,
      and removes objects that are more than 'tol' km/s from the median value
    :param df: A summary dataframe, such as created by get_ccf_summary or find_best_pars
    :param tol: The tolerance, in km/s, to accept an observation as detected
    :return: a dataframe containing only detected companions
    """
    secondary_names = pd.unique(df.Secondary)
    secondary_to_rv = defaultdict(float)
    for secondary in secondary_names:
        rv = df.loc[df.Secondary == secondary]['rv'].median()
        secondary_to_rv[secondary] = rv

    if debug:
        for secondary in sorted(secondary_to_rv.keys()):
            print ('RV for {}: {:.2f} km/s'.format(secondary, secondary_to_rv[secondary]))

    keys = df.Secondary.values
    good = df.loc[abs(df.rv.values - np.array(itemgetter(*keys)(secondary_to_rv))) < tol]
    return good

HDF5_Helpers.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def list_stars(self, print2screen=False):
        """
        List all of the stars in all of the CCF interfaces

        Parameters:
        ===========
        - print2screen:     bool
                            Should we print the stars and dates to screen?

        Returns:
        =========
        - star_list:        list
                            A list of every star in the file, sorted by name.
        """
        stars = []
        for inst in self._interfaces.keys():
            if print2screen:
                print('Stars observed with {}: \n============================\n\n'.format(inst))
            stars.extend(self._interfaces[inst].list_stars(print2screen=print2screen))

        return list(pd.unique(stars))

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_datetime64_dtype_array_returned(self):
        # GH 9431
        expected = np.array(['2015-01-03T00:00:00.000000000+0000',
                             '2015-01-01T00:00:00.000000000+0000'],
                            dtype='M8[ns]')

        dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000',
                                   '2015-01-01T00:00:00.000000000+0000'])
        result = algos.unique(dt_index)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        s = pd.Series(dt_index)
        result = algos.unique(s)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        arr = s.values
        result = algos.unique(arr)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

build_features.py 文件源码项目：KaggleExeter 作者: detomo 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def rename_brands(phone_models):
    """ recast all phone brands and model as string integers brand_i and model_j """
    brands_table = {}
    i = 0
    for brand in pd.unique(phone_models['phone_brand']):
        brands_table[brand] = 'brand_%s' %i
        i += 1

    models_table = {}
    i = 0
    for model in pd.unique(phone_models['device_model']):
        models_table[model] = 'model_%s' %i
        i += 1

    converted = []
    for item in zip(phone_models['phone_brand'],phone_models['device_model']):
        converted.append((brands_table[item[0]],models_table[item[1]]))
    phone_models['phone_brand'] = [x[0] for x in converted]
    phone_models['device_model'] = [x[1] for x in converted]
    return phone_models

test_submission.py 文件源码项目：KaggleExeter 作者: detomo 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def rename_brands(phone_models):
    """ recast all phone brands and model as string integers brand_i and model_j """
    brands_table = {}
    i = 0
    for brand in pd.unique(phone_models['phone_brand']):
        brands_table[brand] = 'brand_%s' %i
        i += 1

    models_table = {}
    i = 0
    for model in pd.unique(phone_models['device_model']):
        models_table[model] = 'model_%s' %i
        i += 1

    converted = []
    for item in zip(phone_models['phone_brand'],phone_models['device_model']):
        converted.append((brands_table[item[0]],models_table[item[1]]))
    phone_models['phone_brand'] = [x[0] for x in converted]
    phone_models['device_model'] = [x[1] for x in converted]
    return phone_models

calvin.py 文件源码项目：calvin 作者: ucd-cws 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def __init__(self, linksfile, ic=None):
    df = pd.read_csv(linksfile)
    df['link'] = df.i.map(str) + '_' + df.j.map(str) + '_' + df.k.map(str)
    df.set_index('link', inplace=True)

    self.df = df

    # self.T = len(self.df)
    SR_stats = pd.read_csv('calvin/data/SR_stats.csv', index_col=0).to_dict()
    self.min_storage = SR_stats['min']
    self.max_storage = SR_stats['max']

    if ic:
      self.apply_ic(ic)

    # a few network fixes to make things work
    self.add_ag_region_sinks()
    self.fix_hydropower_lbs()

    self.nodes = pd.unique(df[['i','j']].values.ravel()).tolist()
    self.links = list(zip(df.i,df.j,df.k))
    self.networkcheck() # make sure things aren't broken

data_node_frame.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def make_unique_value_each_column (self, df, node_id):
        """ Dataframe? ??? ???? ??? ??? ?? ??? ???? 
            Unique Value return in Dataframe
        Args:
          params:
            * df : dataframe
            * node_id: nnid
        Returns:
            json
        Raises:
        """
        try:
            data_conf = dict()
            column_cate_unique = dict()
            numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
            for i, v in df.dtypes.iteritems():
                if (str(v) not in numerics):  # maybe need float
                    column_cate_unique[i] = df[i].unique().size
            data_conf['unique_cell_feature'] = column_cate_unique
            data_conf_json_str = json.dumps(data_conf)
            data_conf_json = json.loads(data_conf_json_str)
            return data_conf_json
        except Exception as e:
            logging.error("make_unique_value_each_column error : {0}, {1}".format(i,v))
            raise e

unsupervised.py 文件源码项目：extract 作者: dblalock 项目源码文件源码阅读 52 收藏 0 点赞 0 评论 0

def makeTable(df, rowsCol, colsCol, dataCol):
    # df.set_index(rowsCol)

    uniqRowVals = pd.unique(df[rowsCol])
    uniqColVals = pd.unique(df[colsCol])

    # "rows col = ", df[rowsCol]
    # print "uniq row vals", uniqRowVals
    # print "uniq col vals", uniqColVals
    # print df[[rowsCol, colsCol, dataCol]]

    out = pd.DataFrame(index=uniqRowVals, columns=uniqColVals)
    for rowVal in uniqRowVals:
        for colVal in uniqColVals:
            rowsMatch = df[rowsCol] == rowVal
            colsMatch = df[colsCol] == colVal
            thisIdx = np.where(rowsMatch * colsMatch)[0][0]
            out.ix[rowVal][colVal] = df[dataCol][thisIdx]

    return out

utils.py 文件源码项目：StackedDAE 作者: glrs 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def label_metadata(label_matrix, label_col):
    # Check whether the column value is given as index (number) or name (string) 
    try:
        label_col = int(label_col)

        # If given as number, take the name of the column out of it
        label_col = label_matrix.columns[label_col]
    except ValueError:
        pass

    import pandas as pd
    # Get the unique classes in the given column, and how many of them are there
    unique_classes = pd.unique(label_matrix[label_col].ravel())
    #num_classes = unique_classes.shape[0]

    # Map the unique n classes with a number from 0 to n  
    label_map = pd.DataFrame({label_col: unique_classes, label_col+'_id':range(len(unique_classes))})

    # Replace the given column's values with the mapped equivalent
    mapped_labels = label_matrix.replace(label_map[[0]].values.tolist(), label_map[[1]].values.tolist())

    # Return the mapped labels as numpy list and the label map (unique classes and number can be obtained from map)
    return np.reshape(mapped_labels[[label_col]].values, (mapped_labels.shape[0],)), np.asarray(label_map) #, unique_classes, num_classes

data_handler.py 文件源码项目：StackedDAE 作者: glrs 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def label_metadata(label_matrix, label_col):
    # Check whether the column value is given as index (number) or name (string) 
    try:
        label_col = int(label_col)

        # If given as number, take the name of the column out of it
        label_col = label_matrix.columns[label_col]
    except ValueError:
        pass

    # Get the unique classes in the given column, and how many of them are there
    unique_classes = pd.unique(label_matrix[label_col].ravel())

    # Map the unique n classes with a number from 0 to n
    label_map = pd.DataFrame({label_col: unique_classes, label_col+'_id':range(len(unique_classes))})

    # Replace the given column values with the mapped equivalent
    mapped_labels = label_matrix.replace(label_map[[0]].values.tolist(), label_map[[1]].values.tolist())
#     print("label_matrix", label_matrix)
#     print("mapped_labels", mapped_labels)

    # Return the mapped labels as ndarray and the label map (unique classes and number can be obtained from map)
    # np.reshape(mapped_labels[[label_col]].values, (mapped_labels.shape[0],))
    # Return the mapped labels as DataFrame and the label map (unique classes and number can be obtained from map)
    return mapped_labels[[label_col]], np.asarray(label_map) #, unique_classes, num_classes

create_inspections_subset.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def create_subset(src, dest, n=250):
    "Given a csv file `src`, create a subset `dest` with `n` unique entities"
    df = pd.read_csv(src)
    lics = pd.unique(df["License #"])
    sublics = lics[random.sample(range(0,len(lics)), n)]
    subset = df[df["License #"].isin(sublics)]
    # Make the column names a little more readable
    subset.columns = map(clean_column_name, subset.columns)
    subset.to_csv(dest, index=False)

feature_processor.py 文件源码项目：johnson-county-ddj-public 作者: dssg 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def convert_categorical(df):
    onecol = df.columns[1]
    onecol_name = df.columns.values.tolist()[1]
    df[onecol] = df[onecol].str.lower()
    categories = pd.unique(df[onecol])


    categories = [x for x in categories if x is not None]

    try:
        categories.remove(' ')
    except:
        pass

    categories = [str(x) for x in categories]

    categories = list(set([str.lower(x).strip() for x in categories]))

    #replaces spaces in middle of word w underscores
    categories = list(set([x.replace(" ", '_') for x in categories]))

    featnames = []
    for i in range(len(categories)):
        if type(categories[i]) is str:
            newfeatstr = onecol_name+'_is_' + categories[i] 
            featnames.append(newfeatstr)
            df[newfeatstr] = (df[onecol] == categories[i])

    onecol_null = onecol_name + "_is_null"
    df[onecol_null] = pd.isnull(df[onecol])
    df[onecol_null] = df[onecol_null].astype(float)
    df = df.drop(onecol, axis=1)
    df[featnames] = df[featnames].astype(float)
    df = df.groupby(config_db['id_column'], sort = False, as_index=False)[featnames].max()
    return df, featnames

visualizer.py 文件源码项目：coquery 作者: gkunter 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def _validate_layout(func):
        def func_wrapper(self):
            if self._col_wrap:
                if self._col_wrap > 16:
                    raise VisualizationInvalidLayout
                else:
                    return func(self)
            if self._col_factor and len(pd.unique(self._table[self._col_factor].values.ravel())) > 16:
                raise VisualizationInvalidLayout
            if self._row_factor and len(pd.unique(self._table[self._row_factor].values.ravel())) > 16:
                raise VisualizationInvalidLayout
            return func(self)
        return func_wrapper

__init__.py 文件源码项目：fstd2nc 作者: neishm 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def vectorize (f):
  from functools import wraps
  try:
    from pandas import Series, unique
    @wraps(f)
    def vectorized_f (x):
      # If we're given a scalar value, then simply return it.
      if not hasattr(x,'__len__'):
        return f(x)
      # Get unique values
      inputs = unique(x)
      outputs = map(f,inputs)
      table = dict(zip(inputs,outputs))
      result = Series(x).map(table)
      return result.values
  except ImportError:
    def cached_f(x, cache={}):
      if x not in cache:
        cache[x] = f(x)
      return cache[x]
    @wraps(f)
    def vectorized_f (x):
      # If we're given a scalar value, then simply return it.
      if not hasattr(x,'__len__'):
        return cached_f(x)
      return map(cached_f,x)
  return vectorized_f


# The type of data returned by the Buffer iterator.

annotaMain.py 文件源码项目：ImgAnnotaPyQt4 作者: ZhengRui 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def saveLabel(self):
        if not len(self.labelFile):
            self.labelFile = QtGui.QFileDialog.getSaveFileName(self, 'Save Label File', os.path.expanduser('~'), 'Txt (*.txt)')

        if len(self.labelFile):
            self.updateLabelsBuf()
            if self.labelsBuf is not None:
                if self.labels is None:
                    self.labels = self.labelsBuf

                self.labels = self.labels[~self.labels.image.isin(pd.unique(self.labelsBuf.image.ravel()))]
                self.labelsBuf = self.labelsBuf[self.labelsBuf.cateid.notnull()]
                self.labels = self.labels.append(self.labelsBuf, ignore_index=True)
                self.labels.to_csv(self.labelFile, index=False)
                self.labelsBuf = self.labelsBuf[self.labelsBuf.image == os.path.basename(self.imgsList[self.ith])]

CCF_Systematics.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def add_actual_temperature(df, method='excel', filename='SecondaryStar_Temperatures.xls'):
    """
    Add the actual temperature to a given summary dataframe
    :param df: The dataframe to which we will add the actual secondary star temperature
    :keyword method: How to get the actual temperature. Options are:
                   - 'spt': Use main-sequence relationships to go from spectral type --> temperature
                   - 'excel': Use tabulated data, available in the file 'SecondaryStar_Temperatures.xls'
    :keyword filename: The filename of the excel spreadsheet containing the literature temperatures.
                       Needs to have the right format! Ignored if method='spt'
    :return: copy of the original dataframe, with an extra column for the secondary star temperature
    """
    # First, get a list of the secondary stars in the data
    secondary_names = pd.unique(df.Secondary)
    secondary_to_temperature = defaultdict(float)
    secondary_to_error = defaultdict(float)

    if method.lower() == 'spt':
        MS = SpectralTypeRelations.MainSequence()
        for secondary in secondary_names:
            star_data = StarData.GetData(secondary)
            spt = star_data.spectype[0] + re.search('[0-9]\.*[0-9]*', star_data.spectype).group()
            T_sec = MS.Interpolate(MS.Temperature, spt)
            secondary_to_temperature[secondary] = T_sec

    elif method.lower() == 'excel':
        table = pd.read_excel(filename, 0)
        for secondary in secondary_names:
            T_sec = table.loc[table.Star.str.lower().str.contains(secondary.strip().lower())]['Literature_Temp'].item()
            T_error = table.loc[table.Star.str.lower().str.contains(secondary.strip().lower())][
                'Literature_error'].item()
            secondary_to_temperature[secondary] = T_sec
            secondary_to_error[secondary] = T_error

    df['Tactual'] = df['Secondary'].map(lambda s: secondary_to_temperature[s])
    df['Tact_err'] = df['Secondary'].map(lambda s: secondary_to_error[s])
    return

CCF_Systematics.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def fit_sigma(df, i):
    """
    Find the largest allowable standard deviation, given the possible values Tactual can take.
    """
    Tmeasured, Tactual, _, _ = get_values(df)
    Tm = Tmeasured[i]

    # Get the possible values, and bin those with this measured value
    possible_values = sorted(pd.unique(df.Tactual))
    edges = [(possible_values[i] + possible_values[i+1])/2 for i in range(len(possible_values)-1)]
    bins = [0] + edges + [9e9]
    good = df.loc[df.Temperature == Tm]
    values, _= np.histogram(good.Tactual.values, bins=bins)

    mean = np.mean(good.Tactual.values)
    std = np.std(good.Tactual.values, ddof=1)
    if std > 0:
        return std

    sigma_test = np.arange(500, 10, -10) #Just test a bunch of values
    idx = np.searchsorted(bins, mean)
    idx = np.argmin(abs(np.array(bins) - mean))
    x1 = bins[idx-2] if idx > 2 else -1
    x2 = bins[idx-1]
    x3 = bins[idx]
    x4 = bins[idx+1] if idx < len(bins)-2 else np.inf
    N = len(good)
    probs = [get_probability(x1, x2, x3, x4, N, mean, s) for s in sigma_test]
    for s, p in zip(sigma_test, probs):
        if p > 0.5:
            return s

    # If we get here, just return a guess value
    return 200.0

    #raise ValueError('No probability > 0!')

Sensitivity.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def read_hdf5(hdf5_file):
    """
    Reads the hdf5 file into a dataframe. Assumes a very specific format!

    Parameters:
    ===========
    - hdf5_file:   string
                   The full path to the hdf5 file.

    Returns
    ========
    A pandas DataFrame containing summary information
    """
    logging.info('Reading HDF5 file {}'.format(hdf5_file))
    hdf5_int = HDF5_Interface(hdf5_file)
    df = hdf5_int.to_df()


    # Get the contrast. Split by group and then merge to limit the amount of calculation needed
    logging.info('Estimating the V-band contrast ratio for each trial')
    test_vsini = df.vsini.unique()[0]
    temp = df.loc[(df.rv == 0) & (df.vsini == test_vsini)].drop_duplicates(subset=['star', 'temperature'])
    temp['contrast'] = temp.apply(lambda r: get_contrast(r, band='V'), axis=1)

    logging.info('Estimating the luminosity ratio for each trial')
    temp['lum_ratio'] = temp.apply(get_luminosity_ratio, axis=1)

    logging.info('Re-merging dataframe')
    df = pd.merge(df, temp[['star', 'temperature', 'contrast', 'lum_ratio']], on=['star', 'temperature'], how='left')
    df['logL'] = np.log10(df.lum_ratio)

    return df

Sensitivity.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def parse_input(inp, sort_output=True, ensure_unique=True):
    """
    Parse the user input to get a list of integers.

    Parameters:
    ===========
    - inp:           string
                     Can be in the form 'a-b', 'a,b,c', 'a-b,c-d', etc.
                     '-' means an inclusive list of every number between a and b
                     ',' means the numbers a and b

    - sort_output:   boolean
                     Sort the output integers?

    - ensure_unique: boolean
                     Make sure the final list has no repeats?
    :return: A list of integers
    """
    sublists = inp.split(',')
    final_list = []
    for l in sublists:
        if '-' in l:
            first, last = l.split('-')
            for i in range(int(first), int(last) + 1):
                final_list.append(i)
        else:
            final_list.append(int(l))

    if ensure_unique:
        final_list = pd.unique(final_list)
    if sort_output:
        final_list = sorted(final_list)
    return final_list

Analyze_CCF.py 文件源码项目：gullikson-scripts 作者: kgullikson88 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def get_ccf(self, params, df=None):
        """
        Get the ccf with the given parameters.

        Parameters:
        ===========
        - params:    dictionary:
                     All the parameters necessary to define a single ccf. This should be
                     a python dictionary with the keys:
                         - 'starname': The name of the star. Try self.list_stars() for the options.
                         - 'date': The UT date of the observations. Try self.list_dates() for the options.
                         - 'T': temperature of the model
                         - 'logg': the log(g) of the model
                         - 'vsini': the vsini by which the model was broadened before correlation
                         - '[Fe/H]': the metallicity of the model
                         - 'addmode': The way the order CCFs were added to make a total one. Can be:
                             - 'simple'
                             - 'ml'
                             - 'weighted'
                             - 'dc'


        - df:        a pandas DataFrame such as outputted by _compile_data

        Returns:
        ========
        -ccf:        pandas DataFrame
                     Holds columns of velocity and CCF power
        """
        if df is None:
            try:
                df = self._compile_data(params['starname'], params['date'])
            except KeyError:
                raise KeyError('Must give get_ccf params with starname and date keywords, if df is not given!')

        Tvals = df['T'].unique()
        T = Tvals[np.argmin(abs(Tvals - params['T']))]
        good = df.loc[(df['T'] == T) & (df.logg == params['logg']) & (df.vsini == params['vsini']) \
                      & (df['[Fe/H]'] == params['[Fe/H]']) & (df.addmode == params['addmode'])]

        return pd.DataFrame(data={'velocity': self.velocities, 'CCF': good['ccf'].item()})

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def test_ints(self):
        arr = np.random.randint(0, 100, size=50)

        result = algos.unique(arr)
        tm.assertIsInstance(result, np.ndarray)

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def test_objects(self):
        arr = np.random.randint(0, 100, size=50).astype('O')

        result = algos.unique(arr)
        tm.assertIsInstance(result, np.ndarray)

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_object_refcount_bug(self):
        lst = ['A', 'B', 'C', 'D', 'E']
        for i in range(1000):
            len(algos.unique(lst))

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_on_index_object(self):

        mindex = pd.MultiIndex.from_arrays([np.arange(5).repeat(5), np.tile(
            np.arange(5), 5)])
        expected = mindex.values
        expected.sort()

        mindex = mindex.repeat(2)

        result = pd.unique(mindex)
        result.sort()

        tm.assert_almost_equal(result, expected)

test_algos.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def test_unique_label_indices():
    from pandas.hashtable import unique_label_indices

    a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8')

    left = unique_label_indices(a)
    right = np.unique(a, return_index=True)[1]

    tm.assert_numpy_array_equal(left, right)

    a[np.random.choice(len(a), 10)] = -1
    left = unique_label_indices(a)
    right = np.unique(a, return_index=True)[1][1:]
    tm.assert_numpy_array_equal(left, right)

types.py 文件源码项目：plydata 作者: has2k1 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def __init__(self, data=None, groups=None, **kwargs):
        super().__init__(data=data, **kwargs)
        if groups is not None:
            self.plydata_groups = list(pd.unique(groups))

one_table.py 文件源码项目：plydata 作者: has2k1 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def _n_distinct(arr):
    """
    Number of unique values in array
    """
    return len(pd.unique(arr))

test_chamber_of_deputies_dataset.py 文件源码项目：serenata-toolbox 作者: datasciencebr 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_clean_2017_reimbursements(self):
        copy(os.path.join(self.fixtures_path, 'reimbursements-2017.xz'), self.path)
        file_path = os.path.join(self.path, 'reimbursements.xz')

        self.subject.clean()

        assert(os.path.exists(file_path))

        dataset = pd.read_csv(file_path, compression='xz')
        all_subquotas = [subquota[1] for subquota in self.subject.subquotas]

        present_subquotas = pd.unique(dataset['subquota_description'])
        for subquota in present_subquotas:
            with self.subTest():
                assert(subquota in all_subquotas)