python类isnan()的实例源码-第2页-面圈网

test_stats.py 文件源码项目：empyrical 作者: quantopian 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_alpha(self, returns, benchmark, expected):
        observed = self.empyrical.alpha(returns, benchmark)
        assert_almost_equal(
            observed,
            expected,
            DECIMAL_PLACES)

        if len(returns) == len(benchmark):
            # Compare to scipy linregress
            returns_arr = returns.values
            benchmark_arr = benchmark.values
            mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr)
            slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask],
                                                         returns_arr[mask])

            assert_almost_equal(
                observed,
                intercept * 252,
                DECIMAL_PLACES
            )

    # Alpha/beta translation tests.

test_stats.py 文件源码项目：empyrical 作者: quantopian 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_beta(self, returns, benchmark, expected):
        observed = self.empyrical.beta(returns, benchmark)
        assert_almost_equal(
            observed,
            expected,
            DECIMAL_PLACES)

        if len(returns) == len(benchmark):
            # Compare to scipy linregress
            returns_arr = returns.values
            benchmark_arr = benchmark.values
            mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr)
            slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask],
                                                         returns_arr[mask])

            assert_almost_equal(
                observed,
                slope
            )

position_stack.py 文件源码项目：plotnine 作者: has2k1 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def strategy(data, params):
        """
        Stack overlapping intervals.

        Assumes that each set has the same horizontal position
        """
        vjust = params['vjust']

        y = data['y'].copy()
        y[np.isnan(y)] = 0
        heights = np.append(0, y.cumsum())

        if params['fill']:
            heights = heights / np.abs(heights[-1])

        data['ymin'] = np.min([heights[:-1], heights[1:]], axis=0)
        data['ymax'] = np.max([heights[:-1], heights[1:]], axis=0)
        # less intuitive than (ymin + vjust(ymax-ymin)), but
        # this way avoids subtracting numbers of potentially
        # similar precision
        data['y'] = ((1-vjust)*data['ymin'] + vjust*data['ymax'])
        return data

bgdata.py 文件源码项目：OpenAPS 作者: medicinexlab 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def _find_index(bg_df, start_date, end_date, make_col_bool):
    if (make_col_bool): bg_df['date'] = bg_df['created_at'].apply(lambda x: x.date()) #create column with just the date if make_col_bool is True

    #Find the first date with the start date (first entry) and the last date with the end date (last entry)
    #Since the older dates have higher indices, we use max() for start and min() for the end dates
    start_index = bg_df[bg_df['date'] == start_date.date()].index.max()
    end_index = bg_df[bg_df['date'] == end_date.date()].index.min()

    #Raises exception if invalid dates (which are labeled as NaN)
    if np.isnan(start_index): raise Exception("Invalid start date: " + str(start_date.date()))
    if np.isnan(end_index): raise Exception("Invalid end date: " + str(end_date.date()))

    return bg_df, start_index, end_index


#Function to get the bg data

vis_corex.py 文件源码项目：LinearCorex 作者: gregversteeg 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    m, nv = mis.shape
    for j in range(m):
        inds = np.argsort(- mis[j, :])[:topk]
        if len(inds) >= 2:
            plt.clf()
            order = np.argsort(cont[:,j])
            subdata = data[:, inds][order].T
            subdata -= np.nanmean(subdata, axis=1, keepdims=True)
            subdata /= np.nanstd(subdata, axis=1, keepdims=True)
            columns = [column_label[i] for i in inds]
            sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata))
            filename = '{}/heatmaps/group_num={}.png'.format(prefix, j)
            if not os.path.exists(os.path.dirname(filename)):
                os.makedirs(os.path.dirname(filename))
            plt.title("Latent factor {}".format(j))
            plt.yticks(rotation=0)
            plt.savefig(filename, bbox_inches='tight')
            plt.close('all')
            #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j],
            #          outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)

data_handlers.py 文件源码项目：feagen 作者: ianlini 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            if ss.isspmatrix(result):
                if np.isnan(result.data).any():
                    raise ValueError("data {} have nan".format(key))
            elif np.isnan(result).any():
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if key in self.h5f:
                    # self.h5f[key][...] = result
                    raise NotImplementedError("Overwriting not supported.")
                else:
                    if (isinstance(result, ss.csc_matrix)
                            or isinstance(result, ss.csr_matrix)):
                        # sparse matrix
                        h5sparse.Group(self.h5f).create_dataset(key,
                                                                data=result)
                    else:
                        self.h5f.create_dataset(key, data=result)
        self.h5f.flush()

kmeans.py 文件源码项目：MachineLearningPracticePrograms 作者: Subarno 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def repeat_until_convergence(labelled_data, labelled_clusters, unlabelled_centroids):
    #find best fitting centroids to the labelled_data
    previous_max_difference = 0
    while True:
        unlabelled_old_centroids = unlabelled_centroids
        unlabelled_centroids = move_centroids(labelled_clusters)
        labelled_clusters = form_clusters(labelled_data, unlabelled_centroids)

        differences = list(map(lambda a, b: np.linalg.norm(a-b),unlabelled_old_centroids,unlabelled_centroids))
        max_difference = max(differences)
        if np.isnan(max_difference-previous_max_difference):
            difference_change = np.nan
        else:
            difference_change = abs((max_difference-previous_max_difference)/np.mean([previous_max_difference,max_difference])) * 100

        previous_max_difference = max_difference
        # difference change is nan once the list of differences is all zeroes.
        if np.isnan(difference_change):
            break
    return labelled_clusters, unlabelled_centroids

data_manager.py 文件源码项目：AutoML5 作者: djajetic 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def loadData (self, filename, verbose=True, replace_missing=True):
        ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse'''
        if verbose:  print("========= Reading " + filename)
        start = time.time()
        if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")):
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file:
                vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle"))
                return pickle.load(pickle_file)
        if 'format' not in self.info.keys():
            self.getFormatData(filename)
        if 'feat_num' not in self.info.keys():
            self.getNbrFeatures(filename)

        data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse}

        data = data_func[self.info['format']](filename, self.info['feat_num'])

        # INPORTANT: when we replace missing values we double the number of variables

        if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)):
            vprint (verbose, "Replace missing values by 0 (slow, sorry)")
            data = data_converter.replace_missing(data)
        if self.use_pickle:
            with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file:
                vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"))
                p = pickle.Pickler(pickle_file) 
                p.fast = True 
                p.dump(data)
        end = time.time()
        if verbose:  print( "[+] Success in %5.2f sec" % (end - start))
        return data

libscores.py 文件源码项目：AutoML5 作者: djajetic 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def sanitize_array(array):
    ''' Replace NaN and Inf (there should not be any!)'''
    a=np.ravel(array)
    maxi = np.nanmax((filter(lambda x: x != float('inf'), a))) # Max except NaN and Inf
    mini = np.nanmin((filter(lambda x: x != float('-inf'), a))) # Mini except NaN and Inf
    array[array==float('inf')]=maxi
    array[array==float('-inf')]=mini
    mid = (maxi + mini)/2
    array[np.isnan(array)]=mid
    return array

pylspm.py 文件源码项目：pylspm 作者: lseman 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def htmt(self):

        htmt_ = pd.DataFrame(pd.DataFrame.corr(self.data_),
                             index=self.manifests, columns=self.manifests)

        mean = []
        allBlocks = []
        for i in range(self.lenlatent):
            block_ = self.Variables['measurement'][
                self.Variables['latent'] == self.latent[i]]
            allBlocks.append(list(block_.values))
            block = htmt_.ix[block_, block_]
            mean_ = (block - np.diag(np.diag(block))).values
            mean_[mean_ == 0] = np.nan
            mean.append(np.nanmean(mean_))

        comb = [[k, j] for k in range(self.lenlatent)
                for j in range(self.lenlatent)]

        comb_ = [(np.sqrt(mean[comb[i][1]] * mean[comb[i][0]]))
                 for i in range(self.lenlatent ** 2)]

        comb__ = []
        for i in range(self.lenlatent ** 2):
            block = (htmt_.ix[allBlocks[comb[i][1]],
                              allBlocks[comb[i][0]]]).values
#            block[block == 1] = np.nan
            comb__.append(np.nanmean(block))

        htmt__ = np.divide(comb__, comb_)
        where_are_NaNs = np.isnan(htmt__)
        htmt__[where_are_NaNs] = 0

        htmt = pd.DataFrame(np.tril(htmt__.reshape(
            (self.lenlatent, self.lenlatent)), k=-1), index=self.latent, columns=self.latent)

        return htmt

yellowfin.py 文件源码项目：YellowFin_Pytorch 作者: JianGoForIt 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def get_cubic_root(self):
    # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2
    # where x = sqrt(mu).
    # We substitute x, which is sqrt(mu), with x = y + 1.
    # It gives y^3 + py = q
    # where p = (D^2 h_min^2)/(2*C) and q = -p.
    # We use the Vieta's substution to compute the root.
    # There is only one real solution y (which is in [0, 1] ).
    # http://mathworld.wolfram.com/VietasSubstitution.html
    # eps in the numerator is to prevent momentum = 1 in case of zero gradient
    if np.isnan(self._dist_to_opt) or np.isnan(self._h_min) or np.isnan(self._grad_var) \
      or np.isinf(self._dist_to_opt) or np.isinf(self._h_min) or np.isinf(self._grad_var):
      logging.warning("Input to cubic solver has invalid nan/inf value!")
      raise Exception("Input to cubic solver has invalid nan/inf value!")

    p = (self._dist_to_opt + eps)**2 * (self._h_min + eps)**2 / 2 / (self._grad_var + eps)
    w3 = (-math.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0
    w = math.copysign(1.0, w3) * math.pow(math.fabs(w3), 1.0/3.0)
    y = w - p / 3.0 / (w + eps)
    x = y + 1

    if self._verbose:
      logging.debug("p %f, denominator %f", p, self._grad_var + eps)
      logging.debug("w3 %f ", w3)
      logging.debug("y %f, denominator %f", y, w + eps)

    if np.isnan(x) or np.isinf(x):
      logging.warning("Output from cubic is invalid nan/inf value!")
      raise Exception("Output from cubic is invalid nan/inf value!")

    return x

training.py 文件源码项目：treecat 作者: posterior 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def treegauss_add_row(
        data_row,
        tree_grid,
        program,
        latent_row,
        vert_ss,
        edge_ss,
        feat_ss, ):
    # Sample latent state using dynamic programming.
    TODO('https://github.com/posterior/treecat/issues/26')

    # Update sufficient statistics.
    for v in range(latent_row.shape[0]):
        z = latent_row[v, :]
        vert_ss[v, :, :] += np.outer(z, z)
    for e in range(tree_grid.shape[1]):
        z1 = latent_row[tree_grid[1, e], :]
        z2 = latent_row[tree_grid[2, e], :]
        edge_ss[e, :, :] += np.outer(z1, z2)
    for v, x in enumerate(data_row):
        if np.isnan(x):
            continue
        z = latent_row[v, :]
        feat_ss[v] += 1
        feat_ss[v, 1] += x
        feat_ss[v, 2:] += x * z  # TODO Use central covariance.

gpUtils.py 文件源码项目：MKLMM 作者: omerwe 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def imputeSNPs(X):
    snpsMean = np.nanmean(X, axis=0)
    isNan = np.isnan(X)
    for i,m in enumerate(snpsMean): X[isNan[:,i], i] = m

    return X

tf_util.py 文件源码项目：distributional_perspective_on_RL 作者: Kiwoo 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __call__(self, *args, **kwargs):
        assert len(args) <= len(self.inputs), "Too many arguments provided"
        feed_dict = {}
        # Update the args
        for inpt, value in zip(self.inputs, args):
            self._feed_input(feed_dict, inpt, value)
        # Update the kwargs
        kwargs_passed_inpt_names = set()
        for inpt in self.inputs[len(args):]:
            inpt_name = inpt.name.split(':')[0]
            inpt_name = inpt_name.split('/')[-1]
            assert inpt_name not in kwargs_passed_inpt_names, \
                "this function has two arguments with the same name \"{}\", so kwargs cannot be used.".format(inpt_name)
            if inpt_name in kwargs:
                kwargs_passed_inpt_names.add(inpt_name)
                self._feed_input(feed_dict, inpt, kwargs.pop(inpt_name))
            else:
                assert inpt in self.givens, "Missing argument " + inpt_name
        assert len(kwargs) == 0, "Function got extra arguments " + str(list(kwargs.keys()))
        # Update feed dict with givens.
        for inpt in self.givens:
            feed_dict[inpt] = feed_dict.get(inpt, self.givens[inpt])
        results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
        if self.check_nan:
            if any(np.isnan(r).any() for r in results):
                raise RuntimeError("Nan detected")
        return results

decoder_test.py 文件源码项目：seq2seq 作者: google 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def test_gradients(self):
    inputs = tf.random_normal(
        [self.batch_size, self.sequence_length, self.input_depth])
    seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length
    labels = np.random.randint(0, self.vocab_size,
                               [self.batch_size, self.sequence_length])

    helper = decode_helper.TrainingHelper(
        inputs=inputs, sequence_length=seq_length)
    decoder_fn = self.create_decoder(
        helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN)
    initial_state = decoder_fn.cell.zero_state(
        self.batch_size, dtype=tf.float32)
    decoder_output, _ = decoder_fn(initial_state, helper)

    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=decoder_output.logits, labels=labels)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    grads_and_vars = optimizer.compute_gradients(tf.reduce_mean(losses))

    #pylint: disable=E1101
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      grads_and_vars_ = sess.run(grads_and_vars)

    for grad, _ in grads_and_vars_:
      self.assertFalse(np.isnan(grad).any())

    return grads_and_vars_

history_container.py 文件源码项目：zipline-chinese 作者: zhanghan1990 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def frame_to_series(self, field, frame, columns=None):
        """
        Convert a frame with a DatetimeIndex and sid columns into a series with
        a sid index, using the aggregator defined by the given field.
        """
        if isinstance(frame, pd.DataFrame):
            columns = frame.columns
            frame = frame.values

        if not len(frame):
            return pd.Series(
                data=(0 if field == 'volume' else np.nan),
                index=columns,
            ).values

        if field in ['price', 'close']:
            # shortcircuit for full last row
            vals = frame[-1]
            if np.all(~np.isnan(vals)):
                return vals
            return ffill(frame)[-1]
        elif field == 'open':
            return bfill(frame)[0]
        elif field == 'volume':
            return np.nansum(frame, axis=0)
        elif field == 'high':
            return np.nanmax(frame, axis=0)
        elif field == 'low':
            return np.nanmin(frame, axis=0)
        else:
            raise ValueError("Unknown field {}".format(field))

history_container.py 文件源码项目：zipline-chinese 作者: zhanghan1990 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def update_last_known_values(self):
        """
        Store the non-NaN values from our oldest frame in each frequency.
        """
        ffillable = self.ffillable_fields
        if not len(ffillable):
            return

        for frequency in self.unique_frequencies:
            digest_panel = self.digest_panels.get(frequency, None)
            if digest_panel:
                oldest_known_values = digest_panel.oldest_frame(raw=True)
            else:
                oldest_known_values = self.buffer_panel.oldest_frame(raw=True)

            oldest_vals = oldest_known_values
            oldest_columns = self.fields
            for field in ffillable:
                f_idx = oldest_columns.get_loc(field)
                field_vals = oldest_vals[f_idx]
                # isnan would be fast, possible to use?
                non_nan_sids = np.where(pd.notnull(field_vals))
                key = (frequency.freq_str, field)
                key_loc = self.last_known_prior_values.index.get_loc(key)
                self.last_known_prior_values.values[
                    key_loc, non_nan_sids
                ] = field_vals[non_nan_sids]

risk.py 文件源码项目：zipline-chinese 作者: zhanghan1990 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def check_entry(key, value):
    if key != 'period_label':
        return np.isnan(value) or np.isinf(value)
    else:
        return False


############################
# Risk Metric Calculations #
############################

assets.py 文件源码项目：zipline-chinese 作者: zhanghan1990 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _compute_asset_lifetimes(self):
        """
        Compute and cache a recarry of asset lifetimes.
        """
        equities_cols = self.equities.c
        buf = np.array(
            tuple(
                sa.select((
                    equities_cols.sid,
                    equities_cols.start_date,
                    equities_cols.end_date,
                )).execute(),
            ), dtype='<f8',  # use doubles so we get NaNs
        )
        lifetimes = np.recarray(
            buf=buf,
            shape=(len(buf),),
            dtype=[
                ('sid', '<f8'),
                ('start', '<f8'),
                ('end', '<f8')
            ],
        )
        start = lifetimes.start
        end = lifetimes.end
        start[np.isnan(start)] = 0  # convert missing starts to 0
        end[np.isnan(end)] = np.iinfo(int).max  # convert missing end to INTMAX
        # Cast the results back down to int.
        return lifetimes.astype([
            ('sid', '<i8'),
            ('start', '<i8'),
            ('end', '<i8'),
        ])

classifier.py 文件源码项目：zipline-chinese 作者: zhanghan1990 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def _compute(self, arrays, dates, assets, mask):
        data = arrays[0]
        bins = self.params['bins']
        to_bin = where(mask, data, nan)
        result = quantiles(to_bin, bins)
        # Write self.missing_value into nan locations, whether they were
        # generated by our input mask or not.
        result[isnan(result)] = self.missing_value
        return result.astype(int64_dtype)