python类load()的实例源码

util.py 文件源码 项目:Dense-Net 作者: achyudhk 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def load_models(models_dir):
    """
    Load saved models from disk. This will attempt to unpickle all files in a
    directory; any files that give errors on unpickling (such as README.txt) will
    be skipped.

    Inputs:
    - models_dir: String giving the path to a directory containing model files.
      Each model file is a pickled dictionary with a 'model' field.

    Returns:
    A dictionary mapping model file names to models.
    """
    models = {}
    for model_file in os.listdir(models_dir):
        with open(os.path.join(models_dir, model_file), 'rb') as f:
            try:
                models[model_file] = pickle.load(f)['model']
            except pickle.UnpicklingError:
                continue
    return models
utils.py 文件源码 项目:IDNNs 作者: ravidziv 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_data(name):
    """Load data from the given name"""
    gen_data = {}
    # new version
    if os.path.isfile(name + 'data.pickle'):
        curent_f = open(name + 'data.pickle', 'rb')
        d2 = cPickle.load(curent_f)
    # Old version
    else:
        curent_f = open(name, 'rb')
        d1 = cPickle.load(curent_f)
        data1 = d1[0]
        data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]])
        # Convert log e to log2
        normalization_factor = 1 / np.log2(2.718281)
        epochsInds = np.arange(0, data.shape[4])
        d2 = {}
        d2['epochsInds'] = epochsInds
        d2['information'] = data / normalization_factor
    return d2
utils.py 文件源码 项目:IDNNs 作者: ravidziv 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_data(name):
    """Load data from the given name"""
    gen_data = {}
    # new version
    if os.path.isfile(name + 'data.pickle'):
        curent_f = open(name + 'data.pickle', 'rb')
        d2 = cPickle.load(curent_f)
    # Old version
    else:
        curent_f = open(name, 'rb')
        d1 = cPickle.load(curent_f)
        data1 = d1[0]
        data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]])
        # Convert log e to log2
        normalization_factor = 1 / np.log2(2.718281)
        epochsInds = np.arange(0, data.shape[4])
        d2 = {}
        d2['epochsInds'] = epochsInds
        d2['information'] = data / normalization_factor
    return d2
word2vec.py 文件源码 项目:bot2017Fin 作者: AllanYiin 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self,filename='word2vec.pklz'):
        """
        Py Word2vec??
        """
        super().__init__()
        self.name='word2vec'
        self.load(filename)
        self.vocab_cnt=len(self)
        self.dims=self[list(self.keys())[0]].shape[0]

        print('???:' + str(self.vocab_cnt))
        print('???:' + str(self.dims))

        self.word2idx= {w: i for i, w in enumerate(self.keys())}
        self.idx2word= {i: w for i, w in enumerate(self.keys())}
        self._matrix =np.array(list(self.values()))
        print(self._matrix.shape)
datasets.py 文件源码 项目:RFHO 作者: lucfra 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def load_caltech101(folder=CALTECH101_DIR, one_hot=True, partitions=None, filters=None, maps=None):
    path = folder + "/caltech101.pickle"
    with open(path, "rb") as input_file:
        X, target_name, files = cpickle.load(input_file)
    dict_name_ID = {}
    i = 0
    list_of_targets = sorted(list(set(target_name)))
    for k in list_of_targets:
        dict_name_ID[k] = i
        i += 1
    dict_ID_name = {v: k for k, v in dict_name_ID.items()}
    Y = []
    for name_y in target_name:
        Y.append(dict_name_ID[name_y])
    if one_hot:
        Y = to_one_hot_enc(Y)
    dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name},
                      sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)])
    if partitions:
        res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True)
        res += [None] * (3 - len(res))
        return Datasets(train=res[0], validation=res[1], test=res[2])
    return dataset
datasets.py 文件源码 项目:RFHO 作者: lucfra 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def load_cifar10(folder=CIFAR10_DIR, one_hot=True, partitions=None, filters=None, maps=None, balance_classes=False):
    path = folder + "/cifar-10.pickle"
    with open(path, "rb") as input_file:
        X, target_name, files = cpickle.load(input_file)
    X = np.array(X)
    dict_name_ID = {}
    i = 0
    list_of_targets = sorted(list(set(target_name)))
    for k in list_of_targets:
        dict_name_ID[k] = i
        i += 1
    dict_ID_name = {v: k for k, v in dict_name_ID.items()}
    Y = []
    for name_y in target_name:
        Y.append(dict_name_ID[name_y])
    if one_hot:
        Y = to_one_hot_enc(Y)
    dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name},
                      sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)])
    if partitions:
        res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True, balance_classes=True)
        res += [None] * (3 - len(res))
        return Datasets(train=res[0], validation=res[1], test=res[2])
    return dataset
StatisticsWeb.py 文件源码 项目:ForumAnalysis 作者: consjuly542 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load_codex_list():
    '''
    Loads codex list from file
    to show in select menu

    Parameters
    ------------
        None

    Returns
    ------------
    * codex_list (string[]): Array of codex

    '''
    codex_list = None
    with open(os.path.join(ROOT_DIR, 'data/res/codex_list'), 'rb') as f_in:
        codex_list = pickle.load(f_in)
    return codex_list
utils.py 文件源码 项目:LINE 作者: VahidooX 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def load_pickle(f):
    """
    loads and returns the content of a pickled file
    it handles the inconsistencies between the pickle packages available in Python 2 and 3
    """
    try:
        import cPickle as thepickle
    except ImportError:
        import _pickle as thepickle

    try:
        ret = thepickle.load(f, encoding='latin1')
    except TypeError:
        ret = thepickle.load(f)

    return ret
test_pmf.py 文件源码 项目:RecSys 作者: arvidzt 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setUp(self):
        self.seed = 0

        file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
        with gzip.open(file_path, 'rb') as f:
            if sys.version_info[0] == 3:
                ratings = cPickle.load(f, encoding='latin1')
            else:
                ratings = cPickle.load(f)

        self.n_user = 943
        self.n_item = 1682
        self.assertEqual(ratings.shape[0], 100000)
        self.assertEqual(ratings[:, 0].min(), 1)
        self.assertEqual(ratings[:, 0].max(), self.n_user)
        self.assertEqual(ratings[:, 1].min(), 1)
        self.assertEqual(ratings[:, 1].max(), self.n_item)

        # let user_id / item_id start from 0
        ratings[:, 0] = ratings[:, 0] - 1
        ratings[:, 1] = ratings[:, 1] - 1
        self.ratings = ratings
test_als.py 文件源码 项目:RecSys 作者: arvidzt 项目源码 文件源码 阅读 111 收藏 0 点赞 0 评论 0
def setUp(self):
        self.seed = 0

        file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
        with gzip.open(file_path, 'rb') as f:
            if sys.version_info[0] == 3:
                ratings = cPickle.load(f, encoding='latin1')
            else:
                ratings = cPickle.load(f)

        self.n_user = 943
        self.n_item = 1682
        self.assertEqual(ratings.shape[0], 100000)
        self.assertEqual(ratings[:, 0].min(), 1)
        self.assertEqual(ratings[:, 0].max(), self.n_user)
        self.assertEqual(ratings[:, 1].min(), 1)
        self.assertEqual(ratings[:, 1].max(), self.n_item)

        # let user_id / item_id start from 0
        ratings[:, 0] = ratings[:, 0] - 1
        ratings[:, 1] = ratings[:, 1] - 1
        self.ratings = ratings
test_bpmf.py 文件源码 项目:RecSys 作者: arvidzt 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def setUp(self):
        self.seed = 0

        file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL)
        with gzip.open(file_path, 'rb') as f:
            if sys.version_info[0] == 3:
                ratings = cPickle.load(f, encoding='latin1')
            else:
                ratings = cPickle.load(f)

        self.n_user = 943
        self.n_item = 1682
        self.assertEqual(ratings.shape[0], 100000)
        self.assertEqual(ratings[:, 0].min(), 1)
        self.assertEqual(ratings[:, 0].max(), self.n_user)
        self.assertEqual(ratings[:, 1].min(), 1)
        self.assertEqual(ratings[:, 1].max(), self.n_item)

        # let user_id / item_id start from 0
        ratings[:, 0] = ratings[:, 0] - 1
        ratings[:, 1] = ratings[:, 1] - 1
        self.ratings = ratings
myredis.py 文件源码 项目:autoxd 作者: nessessary 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get(fn, *args, **kwargs):
    """??redis?cache??
    fn: ??, ????????
    return: data fn????"""
    key = gen_keyname(fn)
    r = createRedis()
    #r.flushall()
    if key not in r.keys():
        o = fn(*args, **kwargs)
        #?????????
        f = cStringIO.StringIO()
        cPickle.dump(o, f)
        s = f.getvalue()
        f.close()        
        r.set(key, s)
    s = r.get(key)
    f = cStringIO.StringIO(s)
    o = cPickle.load(f)
    f.close()
    return o
embeddings_container.py 文件源码 项目:deep-mlsa 作者: spinningbytes 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def load_transformer_list(config_data):
        output_directory = config_data['embeddings_directory']
        output_basename = config_data['embeddings_basename']

        path = os.path.join(output_directory, output_basename)
        config_fname = os.path.join(path, 'config.json')
        with open(config_fname, 'r') as json_data:
            wemb_config = json.load(json_data)
            ngrams = wemb_config['ngrams']

            transformers = []
            for i in range(ngrams - 1):
                phrase_model = Phrases.load(os.path.join(path, '{}gram'.format(i)))
                transformers.append(phrase_model)

        return transformers
utils.py 文件源码 项目:deepnet 作者: parasdahal 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def load_mnist(path, num_training=50000, num_test=10000, cnn=True, one_hot=False):
    f = gzip.open(path, 'rb')
    training_data, validation_data, test_data = cPickle.load(
        f, encoding='iso-8859-1')
    f.close()
    X_train, y_train = training_data
    X_validation, y_validation = validation_data
    X_test, y_test = test_data
    if cnn:
        shape = (-1, 1, 28, 28)
        X_train = X_train.reshape(shape)
        X_validation = X_validation.reshape(shape)
        X_test = X_test.reshape(shape)
    if one_hot:
        y_train = one_hot_encode(y_train, 10)
        y_validation = one_hot_encode(y_validation, 10)
        y_test = one_hot_encode(y_test, 10)
    X_train, y_train = X_train[range(
        num_training)], y_train[range(num_training)]
    X_test, y_test = X_test[range(num_test)], y_test[range(num_test)]
    return (X_train, y_train), (X_test, y_test)
utils.py 文件源码 项目:deepnet 作者: parasdahal 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load_cifar10(path, num_training=1000, num_test=1000):
    Xs, ys = [], []
    for batch in range(1, 6):
        f = open(os.path.join(path, "data_batch_{0}".format(batch)), 'rb')
        data = cPickle.load(f, encoding='iso-8859-1')
        f.close()
        X = data["data"].reshape(10000, 3, 32, 32).astype("float64")
        y = np.array(data["labels"])
        Xs.append(X)
        ys.append(y)
    f = open(os.path.join(CIFAR10_PATH, "test_batch"), 'rb')
    data = cPickle.load(f, encoding='iso-8859-1')
    f.close()
    X_train, y_train = np.concatenate(Xs), np.concatenate(ys)
    X_test = data["data"].reshape(10000, 3, 32, 32).astype("float")
    y_test = np.array(data["labels"])
    X_train, y_train = X_train[range(
        num_training)], y_train[range(num_training)]
    X_test, y_test = X_test[range(num_test)], y_test[range(num_test)]
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train)
    X_train /= 255.0
    X_test /= 255.0
    return (X_train, y_train), (X_test, y_test)
omniglot.py 文件源码 项目:LSH_Memory 作者: RUSH-LAB 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, filepath):
        """
        Args:
            filepath (string): path to data file
            Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0)
            Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images
        """
        with open(filepath, "rb") as f:
            processed_data = pickle.load(f)

        self.data = dict()
        for image, label in zip(processed_data['images'], processed_data['labels']):
            if label not in self.data:
                self.data[label] = list()
            img = np.expand_dims(image, axis=0).astype('float32')
            #img /= 255.0
            self.data[label].append(img)
        self.num_categories = len(self.data)
        self.category_size = len(self.data[processed_data['labels'][0]])
omniglot.py 文件源码 项目:LSH_Memory 作者: RUSH-LAB 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __init__(self, filepath):
        """
        Args:
            filepath (string): path to data file
            Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0)
            Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images
        """
        with open(filepath, "rb") as f:
            processed_data = pickle.load(f)

        self.data = dict()
        for image, label in zip(processed_data['images'], processed_data['labels']):
            if label not in self.data:
                self.data[label] = list()
            img = np.expand_dims(image, axis=0).astype('float32')
            img /= 255.0
            self.data[label].append(img)
        self.num_categories = len(self.data)
        self.category_size = len(self.data[processed_data['labels'][0]])
rnns.py 文件源码 项目:diagonal_rnns 作者: ycemsubakan 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def generate_random_hyperparams(lr_min, lr_max, K_min, K_max, num_layers_min, num_layers_max,load_hparams):
    """This function generates random hyper-parameters for hyperparameter search"""

    #this is for new random parameters
    if not load_hparams[0]:
        lr_exp = np.random.uniform(lr_min, lr_max)
        lr = 10**(lr_exp)
        K = np.random.choice(np.arange(K_min, K_max+1),1)[0]
        num_layers = np.random.choice(np.arange(num_layers_min, num_layers_max + 1),1)[0]
        #momentum_exp = np.random.uniform(-8,0) 
        momentum = np.random.uniform(0,1) #(2**momentum_exp)

    #this loads hyperparameters from an existing file
    else:
        exp_data = np.load('experiment_data/nmf_data_timit_model_bi_mod_lstm_diag_to_full_device_cpu:0_1490813245.npy')[load_hparams[1]]
        lr = exp_data['LR']
        K = exp_data['K']
        num_layers = exp_data['num_layers']
        try:
            momentum = exp_data['num_layers']
        except:
            momentum = None

    return lr, K, num_layers, momentum
skipthoughts.py 文件源码 项目:TAC-GAN 作者: dashayushman 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def load_model():
    """
    Load the model with saved tables
    """
    # Load model options
    print('Loading model parameters...')
    with open('%s.pkl'%path_to_umodel, 'rb') as f:
        uoptions = pkl.load(f)
    with open('%s.pkl'%path_to_bmodel, 'rb') as f:
        boptions = pkl.load(f)

    # Load parameters
    uparams = init_params(uoptions)
    uparams = load_params(path_to_umodel, uparams)
    utparams = init_tparams(uparams)
    bparams = init_params_bi(boptions)
    bparams = load_params(path_to_bmodel, bparams)
    btparams = init_tparams(bparams)

    # Extractor functions
    print('Compiling encoders...')
    embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
    f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
    embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
    f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')

    # Tables
    print('Loading tables...')
    utable, btable = load_tables()

    # Store everything we need in a dictionary
    print('Packing up...')
    model = {}
    model['uoptions'] = uoptions
    model['boptions'] = boptions
    model['utable'] = utable
    model['btable'] = btable
    model['f_w2v'] = f_w2v
    model['f_w2v2'] = f_w2v2

    return model
skipthoughts.py 文件源码 项目:TAC-GAN 作者: dashayushman 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_tables():
    """
    Load the tables
    """
    words = []
    utable = numpy.load(path_to_tables + 'utable.npy', encoding='bytes')
    btable = numpy.load(path_to_tables + 'btable.npy', encoding='bytes')
    f = open(path_to_tables + 'dictionary.txt', 'rb')
    for line in f:
        words.append(line.decode('utf-8').strip())
    f.close()
    utable = OrderedDict(zip(words, utable))
    btable = OrderedDict(zip(words, btable))
    return utable, btable
skipthoughts.py 文件源码 项目:TAC-GAN 作者: dashayushman 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def preprocess(text):
    """
    Preprocess text for encoder
    """
    X = []
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    for t in text:
        sents = sent_detector.tokenize(t)
        result = ''
        for s in sents:
            tokens = word_tokenize(s)
            result += ' ' + ' '.join(tokens)
        X.append(result)
    return X
skipthoughts.py 文件源码 项目:TAC-GAN 作者: dashayushman 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def load_params(path, params):
    """
    load parameters
    """
    pp = numpy.load(path)
    for kk, vv in params.items():
        if kk not in pp:
            warnings.warn('%s is not in the archive'%kk)
            continue
        params[kk] = pp[kk]
    return params


# layers: 'name': ('parameter initializer', 'feedforward')
util.py 文件源码 项目:Dense-Net 作者: achyudhk 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def load_CIFAR_batch(filename):
    """ load single batch of cifar """
    with open(filename, 'rb') as f:
        datadict = pickle.load(f, encoding='latin1')
        X = datadict['data']
        Y = datadict['labels']
        X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
        Y = np.array(Y)
        return X, Y
util.py 文件源码 项目:Dense-Net 作者: achyudhk 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def load_CIFAR10(ROOT):
    """ load all of cifar """
    xs = []
    ys = []
    for b in range(1, 6):
        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
        X, Y = load_CIFAR_batch(f)
        xs.append(X)
        ys.append(Y)
    Xtr = np.concatenate(xs)
    Ytr = np.concatenate(ys)
    del X, Y
    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
    return Xtr, Ytr, Xte, Yte
word2vec.py 文件源码 项目:bot2017Fin 作者: AllanYiin 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def load(self,filename='word2vec.pklz'):
        fil = gzip.open(filename, 'rb')
        while True:
            try:
                tmp=cPickle.load(fil)
                self.update(tmp)
            except EOFError as e:
                print(e)
                break
        fil.close()
download_cifar10.py 文件源码 项目:TensorFlow-Machine-Learning-Cookbook 作者: PacktPublishing 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def load_batch_from_file(file):
    file_conn = open(file, 'rb')
    image_dictionary = cPickle.load(file_conn, encoding='latin1')
    file_conn.close()
    return(image_dictionary)
datasets.py 文件源码 项目:RFHO 作者: lucfra 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def load_cifar100(folder=CIFAR100_DIR, one_hot=True, partitions=None, filters=None, maps=None):
    path = folder + "/cifar-100.pickle"
    with open(path, "rb") as input_file:
        X, target_ID_fine, target_ID_coarse, fine_ID_corr, coarse_ID_corr, files = cpickle.load(input_file)
    X = np.array(X);

    target_ID_fine = target_ID_fine[:len(X)]
    target_ID_coarse = target_ID_coarse[:len(X)]

    fine_ID_corr = {v: k for v, k in zip(range(len(fine_ID_corr)), fine_ID_corr)}
    coarse_ID_corr = {v: k for v, k in zip(range(len(coarse_ID_corr)), coarse_ID_corr)}
    fine_label_corr = {v: k for k, v in fine_ID_corr.items()}
    coarse_label_corr = {v: k for k, v in coarse_ID_corr.items()}

    Y = []
    for name_y in target_ID_fine:
        Y.append(name_y)
    Y = np.array(Y)
    if one_hot:
        Y = to_one_hot_enc(Y)
    superY = []
    for name_y in target_ID_coarse:
        superY.append(name_y)
    superY = np.array(superY)
    if one_hot:
        superY = to_one_hot_enc(superY)

    print(len(X))
    print(len(Y))
    dataset = Dataset(data=X, target=Y,
                      info={'dict_name_ID_fine': fine_label_corr, 'dict_name_ID_coarse': coarse_label_corr,
                                         'dict_ID_name_fine': fine_ID_corr, 'dict_ID_name_coarse': coarse_ID_corr},
                      sample_info=[{'Y_coarse': yc, 'files': f} for yc, f in zip(superY, files)])
    if partitions:
        res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True)
        res += [None] * (3 - len(res))
        return Datasets(train=res[0], validation=res[1], test=res[2])
    return dataset
save_and_load.py 文件源码 项目:RFHO 作者: lucfra 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def load_obj(name, root_dir=None, notebook_mode=True):
    if root_dir is None: root_dir = os.getcwd()
    directory = check_or_create_dir(join_paths(root_dir, FOLDER_NAMINGS['OBJ_DIR']),
                                    notebook_mode=notebook_mode, create=False)

    filename = join_paths(directory, name if name.endswith('.pkgz') else name + '.pkgz')
    with gzip.open(filename, 'rb') as f:
        return pickle.load(f)
StatisticsModule.py 文件源码 项目:ForumAnalysis 作者: consjuly542 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, recompute_statistics=False):
        """
        Initiation of module.

        Parameters
        ------------
        *recompute_statistics(boolean): if True, statistics are calculated again,
                                    if False, statistics are loaded from file

        """
        self.get_article_statistics(recompute_statistics=recompute_statistics)
        # list which users see
        self.cur_articles_list = list(self.article_index.values())
        self.cur_articles_list = [a for a in self.cur_articles_list if a.questions_cnt > 0 and a.official_article]
        # ranking list of articles without filters - for fast execution
        # self.articles_list_all = list(self.article_index.values())
        # self.articles_list_all = [a for a in self.articles_list_all if a.questions_cnt > 0]

        # filters list
        self.filters_type = []
        self.filters_data = []

        # with open("../data/guide_articles/guide_article_ID", "rb") as f:
        #     self.ids_in_guides = cPickle.load(f)

        with open("../data/guide_articles/guides_list", "rb") as f:
            self.guides_list = cPickle.load(f)



        # print (len(self.ids_in_guides))

        # default - ranked by cnt_questions, no filters
        # self.ranking_articles(rank_type='by_cnt_questions')
StatisticsModule.py 文件源码 项目:ForumAnalysis 作者: consjuly542 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_article_statistics(self, recompute_statistics=True):
        """
        Agregate statistics from both forum.
        """
        if recompute_statistics:
            self.get_article_index()
            data_generator = loadDataGenerator()
            cnt_not_match_links = 0
            links_cnt = 0
            l2a = Link2Article()
            # log = open("./logs", "w")
            # error_link = []
            for question_batch in data_generator:
                for question in question_batch:

                    links = LinksSearcher(question.get_all_text()).get_simple_links()
                    for link in links:
                        # log.write(link.link_text + "\n")
                        # log.flush()
                        # function from Alexandrina
                        article = l2a.link2article(link)
                        # print (article)
                        if article:
                            # print (article.article_ID)
                            links_cnt += 1
                            self.article_index[article.article_ID].add_question(question, link)
                        else:
                            cnt_not_match_links += 1

                    sys.stderr.write("\r\t\t\t\t\tALL LINKS: %d; CAN't MATCH: %d" % (links_cnt, cnt_not_match_links))

            with open("./../data/statistics/article_statistics", "wb") as f:
                cPickle.dump(self.article_index, f, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            with open("./../data/statistics/article_statistics", "rb") as f:
                self.article_index = cPickle.load(f)


问题


面经


文章

微信
公众号

扫码关注公众号