python类LabelEncoder()的实例源码

Utils.py 文件源码 项目:Kaggle-Competition-Sberbank 作者: LenzDu 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def FeatureCombination(Df,s='',num_feature=2): 
    feature_set = []
    for c in Df.columns:
        if c.startswith(s): feature_set.append(c)
    print('combining', len(feature_set), 'features')
    data = Df[feature_set].values

    for c in Df.columns:
        if Df[c].dtype == 'object':
            lbl = preprocessing.LabelEncoder()
            lbl.fit(list(Df[c].values))
            Df[c] = lbl.transform(list(Df[c].values))

    imp = preprocessing.Imputer()
    data = imp.fit_transform(data)
    data = preprocessing.scale(data)
    pca = PCA(num_feature)
    pca.fit(data)
    print('explained_variance_ratio_:', pca.explained_variance_ratio_)
    trans = pca.transform(data)
    for i in range(0,num_feature):
        Df[s+'_%d'%(i+1)] = trans[:,i]
    Df.drop(feature_set,1,inplace=True)
    return Df
DataHandler.py 文件源码 项目:TextClassification 作者: AlgorTroy 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def create_codes(df, column_name, revive=False, model_code=0):
    print('Encoding', column_name, '...')
    # get unique data
    nms_unique = df[column_name].unique().tolist()

    # fit model

    if not revive:
        print('Creating new Label Encoder...')
        le = LabelEncoder()
        le.fit(nms_unique)
    else:
        # Reload LE
        le_file_name = "LE_" + str(model_code)
        le = load_pickle(ROOT_PATH + '\\Data\\PickleJar\\' + le_file_name + '.pkl')
    # get all data
    nms = df[column_name].tolist()

    return le.transform(nms), le
sklearn_svm.py 文件源码 项目:Informed-Finance-Canary 作者: Darthone 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def addDailyReturn(dataset):
    """
    Adding in daily return to create binary classifiers (Up or Down in relation to the previous day)
    """

    #will normalize labels
    le = preprocessing.LabelEncoder()

    dataset['UpDown'] = -(dataset['Adj_Close']-dataset['Adj_Close'].shift(-1))/dataset['Adj_Close'].shift(-1)
    print dataset['UpDown']
    # will be denoted by 2 when transformed
    dataset.UpDown[dataset.UpDown >= 0] = "up"
    # will be denoted by 1 when transformed 
    dataset.UpDown[dataset.UpDown < 0] = "down"
    dataset.UpDown = le.fit(dataset.UpDown).transform(dataset.UpDown)
    print dataset['UpDown']
sklearn_svr.py 文件源码 项目:Informed-Finance-Canary 作者: Darthone 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def addDailyReturn(dataset):
    """
    Adding in daily return to create binary classifiers (Up or Down in relation to the previous day)
    """

    #will normalize labels
    le = preprocessing.LabelEncoder()

    #print "dataset['Adj_Close']\n", dataset['Adj_Close'][:5]

    #print "dataset['Adj_Close'].shift(-1)\n", dataset['Adj_Close'].shift(1)[:5]

    dataset['UpDown'] = (dataset['Adj_Close']-dataset['Adj_Close'].shift(1))/dataset['Adj_Close'].shift(1)
    #print dataset['UpDown'][240:]

    # will be denoted by 3 when transformed
    dataset.UpDown[dataset.UpDown > 0] = "sell"

    dataset.UpDown[dataset.UpDown == 0] = "hold"

    dataset.UpDown[dataset.UpDown < 0] = "buy"
    #print dataset['UpDown'][:10]
    dataset.UpDown = le.fit(dataset.UpDown).transform(dataset.UpDown)

    #print dataset['UpDown']
sklearn_knn.py 文件源码 项目:Informed-Finance-Canary 作者: Darthone 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def addDailyReturn(dataset):
    """
    Adding in daily return to create binary classifiers (Up or Down in relation to the previous day)
    """

    #will normalize labels
    le = preprocessing.LabelEncoder()

    dataset['UpDown'] = -(dataset['Adj_Close']-dataset['Adj_Close'].shift(-1))/dataset['Adj_Close'].shift(-1)
    print dataset['UpDown'][:5]
    # will be denoted by 2 when transformed
    dataset.UpDown[dataset.UpDown >= 0] = "up"
    # will be denoted by 1 when transformed 
    dataset.UpDown[dataset.UpDown < 0] = "down"
    print dataset['UpDown'] 
    dataset.UpDown = le.fit(dataset.UpDown).transform(dataset.UpDown)
#   print dataset['UpDown'][:5]
feat_util.py 文件源码 项目:kaggle-prudential-sample 作者: threecourse 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def create_id_df(cls, df, is_train):
        """
        :rtype: DataFrame
        :return: dataFrame, sorted by id, 
                 columns are ["label", "id0", "id", "id_tr", "id_te"]
        """

        df = df[["id0", "label"]].copy()
        df = df.reset_index(drop=True)
        is_train = np.array(is_train)

        le_tr = LabelEncoder().fit(df.id0[is_train])
        le_te = LabelEncoder().fit(df.id0[~is_train])

        df["id_tr"] = np.nan
        df["id_te"] = np.nan
        df.loc[is_train, "id_tr"] = le_tr.transform(df.id0[is_train])
        df.loc[~is_train, "id_te"] = le_te.transform(df.id0[~is_train])
        df["id"] = np.where(np.isnan(df["id_tr"]), len(le_tr.classes_) + df["id_te"], df["id_tr"])

        df = df.fillna(-1)
        df = df.sort("id")
        df = df[["label", "id0", "id", "id_tr", "id_te"]]

        return df
random-forest-daily-returns.py 文件源码 项目:quantopian-machinelearning 作者: arshpreetsingh 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def create_model(context, data):
    # Get the relevant daily prices
    recent_prices = data.history(context.assets, 'price',context.history_range, '1d')

    context.ma_50 =recent_prices.values[-50:].mean()     
    context.ma_200 = recent_prices.values[-200:].mean() 
    #print context.ma_50
    #print context.ma_200
    time_lags = pd.DataFrame(index=recent_prices.index)
    time_lags['price']=recent_prices.values
    time_lags['daily_returns']=time_lags['price'].pct_change()
    time_lags['multiple_day_returns'] =  time_lags['price'].pct_change(3)
    time_lags['rolling_mean'] = time_lags['daily_returns'].rolling(window = 4,center=False).mean()

    time_lags['time_lagged'] = time_lags['price']-time_lags['price'].shift(-2)
    X = time_lags[['price','daily_returns','multiple_day_returns','rolling_mean']].dropna()

    time_lags['updown'] = time_lags['daily_returns']
    time_lags.updown[time_lags['daily_returns']>=0]='up'
    time_lags.updown[time_lags['daily_returns']<0]='down'
    le = preprocessing.LabelEncoder()
    time_lags['encoding']=le.fit(time_lags['updown']).transform(time_lags['updown'])
  #  X = time_lags[['lag1','lag2']] # Independent, or input variables
   # Y = time_lags['direction'] # Dependent, or output variable
    context.model.fit(X,time_lags['encoding'][4:]) # Generate our model
wrappers.py 文件源码 项目:acton 作者: chengsoonong 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def deserialise_encoder(
            encoder: acton_pb.Database.LabelEncoder
        ) -> sklearn.preprocessing.LabelEncoder:
    """Deserialises a LabelEncoder protobuf.

    Parameters
    ----------
    encoder
        LabelEncoder protobuf.

    Returns
    -------
    sklearn.preprocessing.LabelEncoder
        LabelEncoder (or None if no encodings were specified).
    """
    encodings = []
    for encoding in encoder.encoding:
        encodings.append((encoding.class_int, encoding.class_label))
    encodings.sort()
    encodings = numpy.array([c[1] for c in encodings])

    encoder = SKLabelEncoder()
    encoder.classes_ = encodings
    return encoder
models.py 文件源码 项目:Prudential-Life-Insurance-Assessment 作者: AntonUBC 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def fit(self, X, y):
        le = preprocessing.LabelEncoder()
        y = le.fit_transform(y)
        self.num_classes = np.unique(y).shape[0]
        sf = xgb.DMatrix(X, y)
        params = {"objective": 'multi:softprob',
          "eta": self.eta,
          "gamma": self.gamma,
          "max_depth": self.max_depth,
          "min_child_weight": self.min_child_weight,
          "max_delta_step": self.max_delta_step,
          "subsample": self.subsample,
          "silent": self.silent,
          "colsample_bytree": self.colsample_bytree,
          "seed": self.seed,
          "lambda": self.l2_reg,
          "alpha": self.l1_reg,
          "num_class": self.num_classes}
        self.model = xgb.train(params, sf, self.num_round)

        return self
label_encoder.py 文件源码 项目:guacml 作者: guacml 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def execute_inplace(self, data):
        df = data.df
        meta = data.metadata

        classes = {}
        cols_to_encode = meta[meta.type == ColType.CATEGORICAL].index
        for col in cols_to_encode:
            enc = LE()
            df.loc[df[col].notnull(), col] = enc.fit_transform(df.loc[df[col].notnull(), col])
            df[col] = df[col].astype(float)
            meta.loc[col, 'type'] = ColType.INT_ENCODING
            meta.loc[col, 'derived_from'] = col
            classes[col] = enc.classes_
            self.logger.info('LabelEncoder: encoded %s', col)

        self.state = {'classes': classes}
pyrandom.py 文件源码 项目:HousePricePredictionKaggle 作者: Nuwantha 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def pre_process_data():
    for col in categorical_fields:
        data_frame[col].fillna('default',inplace=True)
        data_frame_test[col].fillna('default',inplace=True)

    for col in numerical_fields:
        data_frame[col].fillna(0,inplace=True)
        data_frame_test[col].fillna(0,inplace=True)

    encode=LabelEncoder()
    for col in categorical_fields:
        data_frame[col]=encode.fit_transform(data_frame[col])
        data_frame_test[col]=encode.fit_transform(data_frame_test[col])
    data_frame['SalePrice'].fillna(0,inplace=True)
data_preparation.py 文件源码 项目:keras-utilities 作者: cbaziotis 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def labels_to_categories(y):
    """
    Labels to categories
    :param y: list of labels, ex. ['positive', 'negative', 'positive', 'neutral', 'positive', ...]
    :return: list of categories, ex. [0, 2, 1, 2, 0, ...]
    """
    encoder = LabelEncoder()
    encoder.fit(y)
    y_num = encoder.transform(y)
    return y_num
two_sigma_financial_modelling.py 文件源码 项目:PortfolioTimeSeriesAnalysis 作者: MizioAnd 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def label_classes(df, estimated_var):
        le = LabelEncoder()
        le.fit(df[estimated_var].values)
        return le.classes_
classifier.py 文件源码 项目:TrackToTrip 作者: ruipgil 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self, classifier=None):
        if classifier:
            self.clf = classifier
        else:
            self.clf = SGDClassifier(loss="log", penalty="l2", shuffle=True, n_iter=2500)
        self.labels = preprocessing.LabelEncoder()
        self.feature_length = -1
train.py 文件源码 项目:YOLO-Object-Detection-Tensorflow 作者: huseinzol05 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_dataset():

    list_folder = os.listdir('data/')
    list_images = []
    for i in xrange(len(list_folder)):
        images = os.listdir('data/' + list_folder[i])
        for x in xrange(len(images)):
            image = [list_folder[i] + '/' + images[x], list_folder[i]]
            list_images.append(image)
    list_images = np.array(list_images)
    np.random.shuffle(list_images)

    print "before cleaning got: " + str(list_images.shape[0]) + " data"

    list_temp = []
    for i in xrange(list_images.shape[0]):
        image = misc.imread('data/' + list_images[i, 0])
        if len(image.shape) < 3:
            continue
        list_temp.append(list_images[i, :].tolist())

    list_images = np.array(list_temp)
    print "after cleaning got: " + str(list_images.shape[0]) + " data"
    label = np.unique(list_images[:, 1]).tolist()
    list_images[:, 1] = LabelEncoder().fit_transform(list_images[:, 1])
    return list_images, np.unique(list_images[:, 1]).shape[0], label
preparedata.py 文件源码 项目:Supply-demand-forecasting 作者: LevinJ 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def __do_label_encoding(self):
        df_train, _ = self.res_data_dict[g_singletonDataFilePath.getTrainDir()]
        df_testset1 = self.res_data_dict[g_singletonDataFilePath.getTest1Dir()]
        df_testset2 = self.res_data_dict[g_singletonDataFilePath.getTest2Dir()]
        le = LabelEncoder()
        cross_feature_dict = self.__get_label_encode_dict()
        for _, new_feature_name in cross_feature_dict.iteritems():
            to_be_stacked = [df_train[new_feature_name], df_testset1[new_feature_name], df_testset2[new_feature_name]]
            le.fit(pd.concat(to_be_stacked, axis=0))
            df_train[new_feature_name] = le.transform(df_train[new_feature_name])
            df_testset1[new_feature_name] = le.transform(df_testset1[new_feature_name])
            df_testset2[new_feature_name] = le.transform(df_testset2[new_feature_name])

        return
encode.py 文件源码 项目:skutil 作者: tgsmith61591 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def fit(self, column):
        self.encoder_ = LabelEncoder().fit(h2o_col_to_numpy(column))
        self.classes_ = self.encoder_.classes_
        return self
io.py 文件源码 项目:ltls 作者: kjasinska 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, multilabel=False):
        self.multilabel = multilabel
        if self.multilabel:
            self.le = MultiLabelBinarizer(sparse_output=True)
        else:
            self.le = LabelEncoder()
        self.from_classes = False
classifier.py 文件源码 项目:quoll 作者: LanguageMachines 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self):
        self.label_encoder = preprocessing.LabelEncoder()
tests.py 文件源码 项目:datacleaner 作者: rhiever 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_autoclean_no_nans_with_strings():
    """Test autoclean() with a data set that has some string-encoded categorical values and no NaNs"""
    data = pd.DataFrame({'A': np.random.rand(1000),
                         'B': np.random.rand(1000),
                         'C': np.random.randint(0, 3, 1000)})

    string_map = {0: 'oranges', 1: 'apples', 2: 'bananas'}
    data['C'] = data['C'].apply(lambda x: string_map[x])

    hand_cleaned_data = data.copy()
    hand_cleaned_data['C'] = LabelEncoder().fit_transform(hand_cleaned_data['C'].values)

    cleaned_data = autoclean(data)

    assert cleaned_data.equals(hand_cleaned_data)


问题


面经


文章

微信
公众号

扫码关注公众号