python类load()的实例源码

misc_util.py 文件源码 项目:distributional_perspective_on_RL 作者: Kiwoo 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def pickle_load(path, compression=False):
    """Unpickle a possible compressed pickle.

    Parameters
    ----------
    path: str
        path to the output file
    compression: bool
        if true assumes that pickle was compressed when created and attempts decompression.

    Returns
    -------
    obj: object
        the unpickled object
    """

    if compression:
        with zipfile.ZipFile(path, "r", compression=zipfile.ZIP_DEFLATED) as myzip:
            with myzip.open("data") as f:
                return pickle.load(f)
    else:
        with open(path, "rb") as f:
            return pickle.load(f)
flora.py 文件源码 项目:flora 作者: Lamden 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def directory_has_smart_contract(location):
    # returns bool if there is a tsol contract in said directory
    # probably makes more sense to put this inside of the tsol package
    code_path = glob.glob(os.path.join(location, '*.tsol'))
    example = glob.glob(os.path.join(location, '*.json'))

    assert len(code_path) > 0 and len(example) > 0, 'Could not find *.tsol and *.json files in provided directory.'

    # pop off the first file name and turn the code into a file object
    code = open(code_path[0])

    # turn the example into a dict
    with open(example[0]) as e:
        example = json.load(e)

    try:
        tsol.compile(code, example)
    except Exception as e:
        print(e)
        return False
    return True
forking.py 文件源码 项目:kinect-2-libras 作者: inessadl 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def main():
        '''
        Run code specifed by data received over pipe
        '''
        assert is_forking(sys.argv)

        handle = int(sys.argv[-1])
        fd = msvcrt.open_osfhandle(handle, os.O_RDONLY)
        from_parent = os.fdopen(fd, 'rb')

        process.current_process()._inheriting = True
        preparation_data = load(from_parent)
        prepare(preparation_data)
        self = load(from_parent)
        process.current_process()._inheriting = False

        from_parent.close()

        exitcode = self._bootstrap()
        exit(exitcode)
data.py 文件源码 项目:DREAM 作者: LaceyChen17 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_item_history(self, prior_or_train, reconstruct = False, none_idx = 49689):
        filepath = self.cache_dir + './item_history_' + prior_or_train + '.pkl'
        if (not reconstruct) and os.path.exists(filepath):
            with open(filepath, 'rb') as f:
                item_history = pickle.load(f)
        else:
            up = self.get_users_orders(prior_or_train).sort_values(['user_id', 'order_number', 'product_id'], ascending = True)
            item_history = up.groupby(['user_id', 'order_number'])['product_id'].apply(list).reset_index()
            item_history.loc[item_history.order_number == 1, 'product_id'] = item_history.loc[item_history.order_number == 1, 'product_id'] + [none_idx]
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
            # accumulate 
            item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].transform(pd.Series.cumsum)
            # get unique item list
            item_history['product_id'] = item_history['product_id'].apply(set).apply(list)
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True)
            # shift each group to make it history
            item_history['product_id'] = item_history.groupby(['user_id'])['product_id'].shift(1)
            for row in item_history.loc[item_history.product_id.isnull(), 'product_id'].index:
                item_history.at[row, 'product_id'] = [none_idx]
            item_history = item_history.sort_values(['user_id', 'order_number'], ascending = True).groupby(['user_id'])['product_id'].apply(list).reset_index()
            item_history.columns = ['user_id', 'history_items']

            with open(filepath, 'wb') as f:
                pickle.dump(item_history, f, pickle.HIGHEST_PROTOCOL)
        return item_history
get_data.py 文件源码 项目:Doubly-Stochastic-DGP 作者: ICL-SML 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_taxi_stats(data_path=data_path):
    file_name = 'taxi_data_stats.p'
    path = data_path + file_name
    if not os.path.isfile(path):
        download(file_name, data_path=data_path)

    import pickle
    stats = pickle.load(open(path, 'r'))
    sum_X = stats['sum_X']
    sum_X2 = stats['sum_X2']
    n = float(stats['n'])
    X_mean = sum_X / n
    X_std = ((sum_X2 - (sum_X**2)/n)/(n-1))**0.5

    X_mean = np.reshape(X_mean, [1, -1])
    X_std = np.reshape(X_std, [1, -1])

    return X_mean, X_std
trace.py 文件源码 项目:kinect-2-libras 作者: inessadl 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def __init__(self, counts=None, calledfuncs=None, infile=None,
                 callers=None, outfile=None):
        self.counts = counts
        if self.counts is None:
            self.counts = {}
        self.counter = self.counts.copy() # map (filename, lineno) to count
        self.calledfuncs = calledfuncs
        if self.calledfuncs is None:
            self.calledfuncs = {}
        self.calledfuncs = self.calledfuncs.copy()
        self.callers = callers
        if self.callers is None:
            self.callers = {}
        self.callers = self.callers.copy()
        self.infile = infile
        self.outfile = outfile
        if self.infile:
            # Try to merge existing counts file.
            try:
                counts, calledfuncs, callers = \
                        pickle.load(open(self.infile, 'rb'))
                self.update(self.__class__(counts, calledfuncs, callers))
            except (IOError, EOFError, ValueError), err:
                print >> sys.stderr, ("Skipping counts file %r: %s"
                                      % (self.infile, err))
train_val.py 文件源码 项目:HandDetection 作者: YunqiuXu 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def from_snapshot(self, sess, sfile, nfile):
    print('Restoring model snapshots from {:s}'.format(sfile))
    self.saver.restore(sess, sfile)
    print('Restored.')
    # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have
    # tried my best to find the random states so that it can be recovered exactly
    # However the Tensorflow state is currently not available
    with open(nfile, 'rb') as fid:
      st0 = pickle.load(fid)
      cur = pickle.load(fid)
      perm = pickle.load(fid)
      cur_val = pickle.load(fid)
      perm_val = pickle.load(fid)
      last_snapshot_iter = pickle.load(fid)

      np.random.set_state(st0)
      self.data_layer._cur = cur
      self.data_layer._perm = perm
      self.data_layer_val._cur = cur_val
      self.data_layer_val._perm = perm_val

    return last_snapshot_iter
pascal_voc.py 文件源码 项目:HandDetection 作者: YunqiuXu 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.

    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if os.path.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        try:
          roidb = pickle.load(fid)
        except:
          roidb = pickle.load(fid, encoding='bytes')
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_pascal_annotation(index)
                for index in self.image_index]
    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))

    return gt_roidb
db_utils.py 文件源码 项目:pybot 作者: spillai 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def iter_keys_values(self, keys, inds=None, verbose=False): 
        for key in keys: 
            if key not in self.keys_: 
                raise RuntimeError('Key %s not found in dataset. keys: %s' % (key, self.keys_))

        idx, ii = 0, 0
        total_chunks = len(self.meta_file_.chunks)
        inds = np.sort(inds) if inds is not None else None

        for chunk_idx, chunk in enumerate(progressbar(self.meta_file_.chunks, size=total_chunks, verbose=verbose)): 
            data = AttrDict.load(self.get_chunk_filename(chunk_idx))

            # if inds is None: 
            items = (data[key] for key in keys)
            for item in izip(*items): 
                yield item
            # else:
            #     for i, item in enumerate(data[key]): 
            #         if inds[ii] == idx + i: 
            #             yield item
            #             ii += 1
            #             if ii >= len(inds): break
            #     idx += len(data[key])
db_utils.py 文件源码 项目:pybot 作者: spillai 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def iterchunks(self, key, batch_size=10, verbose=False): 
        if key not in self.keys_: 
            raise RuntimeError('Key %s not found in dataset. keys: %s' % (key, self.keys_))

        idx, ii = 0, 0
        total_chunks = len(self.meta_file_.chunks)
        batch_chunks = grouper(range(len(self.meta_file_.chunks)), batch_size)

        for chunk_group in progressbar(batch_chunks, size=total_chunks / batch_size, verbose=verbose): 
            items = []
            # print key, chunk_group
            for chunk_idx in chunk_group: 
                # grouper will fill chunks with default none values
                if chunk_idx is None: continue
                # Load chunk
                data = AttrDict.load(self.get_chunk_filename(chunk_idx))
                for item in data[key]: 
                    items.append(item)
            yield items
operate_hash.py 文件源码 项目:didi_competition 作者: Heipiao 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def district_hash_map(data_frame):
    district_map_f = "cluster_map.pickle"
    district_map_f_path = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR, 
                                        district_map_f)
    if not os.path.exists(district_map_f_path):
        create_hash_district_map_dict()
    # load the needed map file
    with open(district_map_f_path, "rb") as f:
        map_rule = pickle.load(f)

    # map the needed cols..
    for i in range(len(data_frame.columns)):
        if "district_hash" in data_frame.columns[i]:
            # map the hash according to the map rule
            district_hash_col = data_frame.columns[i]
            data_frame[district_hash_col] = data_frame[district_hash_col].replace(map_rule)

            # after mapping, delete its hash str
            new_name = re.sub("_hash","",district_hash_col)
            data_frame.rename(columns={district_hash_col: new_name}, inplace = True)

    return data_frame


## input the dir you want to map the hash
abstract_related.py 文件源码 项目:gransk 作者: pcbje 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def setup(self, config):
    """
    Load existing data for given worker.

    :param config: Configuration object.
    :type config: ``dict``
    """
    self.path = os.path.join(
        config[helper.DATA_ROOT], '%s_buckets-%s.pickle' %
        (self.NAME, config[helper.WORKER_ID]))

    with open(self.path, 'a') as _:
      pass

    with open(self.path, 'rb') as inp:
      try:
        self.buckets = pickle.load(inp)
      except Exception:
        self.buckets = {}

    config_related = config.get(helper.RELATED, {}).get(self.NAME, {})
    self.min_score = config_related.get(helper.MIN_SCORE, 0.4)
    self.min_shared = config_related.get(helper.MIN_SHARED, 5)
    self.max_results = config_related.get(helper.MAX_RESULTS, 100)
abstract_related.py 文件源码 项目:gransk 作者: pcbje 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_all(self, config):
    """
    Load all existing data.

    :param config: Configuration object.
    :type config: ``dict``
    """
    self.buckets = {}

    for path in glob.glob(os.path.join(
            config[helper.DATA_ROOT], '%s_buckets-*.pickle' % self.NAME)):
      with open(path, 'rb') as inp:
        try:
          for key, value in pickle.load(inp).items():
            if key in self.buckets:
                self.buckets[key]['bins'].update(value['bins'])
            else:
              self.buckets[key] = value
        except:
          logging.warning('could not load related_%s data', self.NAME)
candidates.py 文件源码 项目:lung-cancer-detector 作者: YichenGong 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def build_data_dict(self, layer_features, k=5):
    """
    This build dict[id] = {label, spacing, 1={loc, p, layer1_feature, layer2_feature...}, 2={}...}

    :param layer_features: features from layer, e.g 67, 77
    :param k: number of nodule considered as inputs
    :return: a combined dictionary
    """
    with open(self.pkl_dir + self.data_file_name, 'rb') as data_file:
      data = cPickle.load(data_file)
    with open(self.pkl_dir + self.feature_file_name, 'rb') as feature_file:
      features = cPickle.load(feature_file)

    data_dict = {}
    for d,f in zip(data, features):
      pid = d['id']
      data_dict[pid] = {'label':d['label'], 'spacing':d['spacing']}

      # add the features
      for i in range(k):
        data_dict[pid][i] = {'loc': f['loc_{}'.format(i)], 'p': f['p_{}'.format(i)]}
        for layer in layer_features:
          data_dict[pid][i][layer] = f['out_{}_{}'.format(i, layer)]

    return data_dict
luna_preprocessed_load_data.py 文件源码 项目:lung-cancer-detector 作者: YichenGong 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def next_batch(self, batch_size):
        assert self.train_mode or self.validation_mode, "Please set mode, train, validation or test. e.g. DataLoad.train()"
        idx_next_batch = [(self.current_idx + i)%len(self.p_imgs) for i in range(self.batch_size)]
        patient_img_next_batch = [ self.p_imgs[idx] for idx in idx_next_batch]
        batch_image = []
        batch_mask = []
        for image in patient_img_next_batch:
            fi = gzip.open(self.data_path + image, 'rb')
            img = pickle.load(fi)
            img = np.expand_dims(img, axis=2)
            batch_image.append(img)
            fi.close()
            fm = gzip.open(self.mask_path + image, 'rb')
            mask = pickle.load(fm)
            fm.close()
            mask_binary_class = np.zeros([mask.shape[0],mask.shape[1],2])
            mask_binary_class[:,:,0][mask == 0] = 1
            mask_binary_class[:,:,1][mask == 1] = 1
            batch_mask.append(mask_binary_class)
        self.current_idx = (self.current_idx + batch_size) % len(self.p_imgs)
        batched_image = np.stack(batch_image)
        batched_mask = np.stack(batch_mask)
        return batched_image, batched_mask
__init__.py 文件源码 项目:earthy 作者: alvations 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def sent_tokenize(text, lang='english'):
    """
    Punkt sentence tokenizer from NLTK.
    """
    global _nltk_sent_tokenizer
    try:
        _nltk_sent_tokenizer
    except NameError:
        # If the sentence tokenizer wasn't previously initialized.
        available_languages = ['czech', 'danish', 'dutch', 'english',
                               'estonian', 'finnish', 'french', 'german',
                               'greek', 'italian', 'norwegian', 'polish',
                               'portuguese', 'slovene', 'spanish', 'swedish',
                               'turkish']
        assert lang in available_languages, "Punkt Tokenizer for {} not available".format(lang)
        # Checks that the punkt tokenizer model was previously downloaded.
        download('punkt', quiet=True)
        path_to_punkt = _nltk_downloader._download_dir + '/tokenizers/punkt/{}.pickle'.format(lang)
        with open(path_to_punkt, 'rb') as fin:
            _nltk_sent_tokenizer = pickle.load(fin)
    # Actual tokenization using the Punkt Model.
    return _nltk_sent_tokenizer.tokenize(text)
recovery.py 文件源码 项目:astrobase 作者: waqasbhatti 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def read_fakelc(fakelcfile):
    '''
    This just reads a pickled fake LC.

    '''

    try:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd)
    except UnicodeDecodeError:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd, encoding='latin1')

    return lcdict



#######################
## UTILITY FUNCTIONS ##
#######################
generation.py 文件源码 项目:astrobase 作者: waqasbhatti 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_pklc(lcfile):
    '''
    This just reads a pickle.

    '''

    try:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd)
    except UnicodeDecodeError:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd, encoding='latin1')

    return lcdict


# LC format -> [default fileglob,  function to read LC format]
lcproc.py 文件源码 项目:astrobase 作者: waqasbhatti 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def read_pklc(lcfile):
    '''
    This just reads a pickle.

    '''

    try:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd)
    except UnicodeDecodeError:
        with open(lcfile,'rb') as infd:
            lcdict = pickle.load(infd, encoding='latin1')

    return lcdict



# these translate filter operators given as strings to Python operators
workflow.py 文件源码 项目:Gank-Alfred-Workflow 作者: hujiaweibujidao 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """

        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer
sessions.py 文件源码 项目:Flask_Blog 作者: sugarguo 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def get(self, sid):
        if not self.is_valid_key(sid):
            return self.new()
        try:
            f = open(self.get_session_filename(sid), 'rb')
        except IOError:
            if self.renew_missing:
                return self.new()
            data = {}
        else:
            try:
                try:
                    data = load(f)
                except Exception:
                    data = {}
            finally:
                f.close()
        return self.session_class(data, sid, False)
coord.py 文件源码 项目:kaggle-review 作者: daxiongshu 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def find_neighbor():
    h2c = pickle.load(open("comps/mobike/sol_carl/data/h2c.p","rb"))
    c2h = pickle.load(open("comps/mobike/sol_carl/data/c2h.p","rb"))
    print(len(h2c),len(c2h))
    lc = [len(c2h[i])  for i in c2h]
    #distribution(lc)
    #point = list(h2c.keys())[0]
    point = "wx4snhx"
    print("hash", point, h2c[point])
    lat,lon = h2c[point]
    #lat,lon = int(lat+0.5),int(lon+0.5) 
    points = c2h[(lat,lon)]
    for la in [lat-0.01,lat,lat+0.01]:
        for lo in [lon-0.01,lon,lon+0.01]:
            coord = (la,lo)
            points = c2h.get(coord,[])
            for p in points:
                d = geo_distance(h2c[p],(lat,lon))
                print(coord,p,d)
nlp_pd_db.py 文件源码 项目:kaggle-review 作者: daxiongshu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_per_sample_tf(self, texts, field, silent=0):
        """
        Each sample is a document.
        Input:
            texts: ["train","text"]
        """
        if self.sample_tf is not None:
            return

        self.sample_tf = {}
        self.get_per_sample_words_count(texts, field, 1)

        for text in texts:
            name = "{}/{}_sample_tf_{}.p".format(self.flags.data_path,self.name,text)
            if os.path.exists(name):
                self.sample_tf[text] = pickle.load(open(name,'rb'))
            else:
                print("gen",name)
                tf_list = tf(self.sample_words_count[text],0)
                pickle.dump(tf_list,open(name,'wb'))
                self.sample_tf[text] = tf_list
            if silent==0:
                print("\n{} sample tf done".format(text))
utils.py 文件源码 项目:kaggle-review 作者: daxiongshu 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def mean_target_rate(name,out,idcol,ycol):
    if os.path.exists(out):
        return pickle.load(open(out,'rb'))
    yc,cc = defaultdict(float),defaultdict(float)
    for c,row in enumerate(csv.DictReader(open(name))):
        y = float(row[ycol])
        for i in row:
            if i in [idcol,ycol]:
                continue
            v = "%s-%s"%(i,row[i])
            yc[v] += y
            cc[v] += 1.0

        if c>0 and c%100000 == 0:
            print("rows %d len_cc %d"%(c,len(cc)))
    for i in yc:
        yc[i] = yc[i]/cc[i]
    pickle.dump(yc,open(out,'wb'))
    return yc
tutor.py 文件源码 项目:fluxpart 作者: usda-ars-ussl 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def tutor_fpout():
    pklout = os.path.join(RESDIR, TUTORPKL)
    if os.path.exists(pklout):
        with open(pklout, 'rb') as f:
            fpout = pickle.load(f)
    else:
        print('re-creating fp results ... this could take a few minutes')
        zip_archive = os.path.join(DATADIR, ZIPFILE)
        with zipfile.ZipFile(zip_archive, 'r') as zfile:
            zfile.extractall(DATADIR)
        fpout = tutor_example()
        make_clean_dat()
        os.makedirs(RESDIR, exist_ok=True)
        with open(pklout, 'wb') as f:
            pickle.dump(fpout, f)
    return fpout
helpers.py 文件源码 项目:quail 作者: ContextLab 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def load_egg(filepath):
    """
    Loads pickled egg

    Parameters
    ----------
    filepath : str
        Location of pickled egg

    Returns
    ----------
    egg : Egg data object
        A loaded unpickled egg

    """

    with open(filepath, 'rb') as f:
        egg = pickle.load(f)

    return egg
helpers.py 文件源码 项目:quail 作者: ContextLab 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def load_egg(filepath):
    """
    Loads pickled egg

    Parameters
    ----------
    filepath : str
        Location of pickled egg

    Returns
    ----------
    egg : Egg data object
        A loaded unpickled egg

    """

    with open(filepath, 'rb') as f:
        egg = pickle.load(f)

    return egg
common.py 文件源码 项目:cli 作者: sparkl 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def unpickle_cookies(args, alias=None):
    """
    Unpickles the cookies file for the given alias and
    returns the original object.

    If no file exists, then an empty cookies object is
    returned.
    """
    if alias is None:
        alias = args.alias

    cookie_file = os.path.join(
        get_working_dir(args),
        alias + ".cookies")

    try:
        with open(cookie_file, "rb") as cookie_jar:
            cookies = pickle.load(cookie_jar)

    except BaseException:
        cookies = requests.cookies.RequestsCookieJar()
    return cookies
data_manager.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def loadData (self, filename, verbose=True, replace_missing=True):
        ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
        if verbose:  print("========= Reading " + filename)
        start = time.time()
        if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
                vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
                return pickle.load(pickle_file)
        if 'format' not in self.info.keys():
            self.getFormatData(filename)
        if 'feat_num' not in self.info.keys():
            self.getNbrFeatures(filename)

        data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}

        data = data_func[self.info['format']](filename, self.info['feat_num'])

        # INPORTANT: when we replace missing values we double the number of variables

        if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
            vprint (verbose, "Replace missing values by 0 (slow, sorry)")
            data = data_converter.replace_missing(data)
        if self.use_pickle:
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
                vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
                p = pickle.Pickler(pickle_file) 
                p.fast = True 
                p.dump(data)
        end = time.time()
        if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
        return data
data_manager.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def loadLabel (self, filename, verbose=True):
        ''' Get the solution/truth values'''
        if verbose:  print("========= Reading " + filename)
        start = time.time()
        if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
                vprint (verbose, "Loading pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
                return pickle.load(pickle_file)
        if 'task' not in self.info.keys():
            self.getTypeProblem(filename)

           # IG: Here change to accommodate the new multiclass label format
        if self.info['task'] == 'multilabel.classification':
            label = data_io.data(filename)
        elif self.info['task'] == 'multiclass.classification':
            label = data_converter.convert_to_num(data_io.data(filename))              
        else:
            label = np.ravel(data_io.data(filename)) # get a column vector
            #label = np.array([np.ravel(data_io.data(filename))]).transpose() # get a column vector

        if self.use_pickle:
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
                vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
                p = pickle.Pickler(pickle_file) 
                p.fast = True 
                p.dump(label)
        end = time.time()
        if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
        return label


问题


面经


文章

微信
公众号

扫码关注公众号