def compute_histogram(data, labels):
histogram = itemfreq(sorted(data))
for label in labels:
if label not in histogram[:,0]:
histogram = np.vstack((histogram,
np.array([[label, 0]], dtype=object)))
histogram = histogram[histogram[:,0].argsort()]
return histogram
# compute histograms
python类itemfreq()的实例源码
def compute_histogram(data, labels):
histogram = dict(itemfreq(data))
for label in labels:
if label not in histogram:
histogram[label] = .0
return histogram
def compute_histogram(data, labels):
histogram = itemfreq(sorted(data))
for label in labels:
if label not in histogram[:,0]:
histogram = np.vstack((histogram,
np.array([[label, 0]], dtype=object)))
histogram = histogram[histogram[:,0].argsort()]
return histogram
# compute histograms
def compute_histogram(data, labels):
histogram = dict(itemfreq(data))
for label in labels:
if label not in histogram:
histogram[label] = .0
return histogram
def boc_term_vectors(word_list):
word_list = [word.lower() for word in word_list]
unique_chars = np.unique(
np.hstack([list(word)
for word in word_list]))
word_list_term_counts = [{char: count for char, count in itemfreq(list(word))}
for word in word_list]
boc_vectors = [np.array([int(word_term_counts.get(char, 0))
for char in unique_chars])
for word_term_counts in word_list_term_counts]
return list(unique_chars), boc_vectors
def _calculate_leaf_value(self, targets):
"""Find optimal value for leaf."""
if self.loss is not None:
# Gradient boosting
self.outcome = self.loss.approximate(targets['actual'], targets['y_pred'])
else:
# Random Forest
if self.regression:
# Mean value for regression task
self.outcome = np.mean(targets['y'])
else:
# Probability for classification task
self.outcome = stats.itemfreq(targets['y'])[:, 1] / float(targets['y'].shape[0])
def calc_class_entropy(y):
class_counts = stats.itemfreq(y)[:, 1]
return stats.entropy(class_counts, base=2)
def cudatest_hist():
# src1 = np.arange(n, dtype=np.float32)
src1 = np.random.randint(BIN_COUNT,size=n).astype(np.float32)
histogram = np.zeros(BIN_COUNT, dtype=np.int32)
print(src1)
stream = cuda.stream() # use stream to trigger async memory transfer
ts = timer()
# Controll the iterations
count = 1
for i in range(count):
with stream.auto_synchronize():
# ts = timer()
d_src1 = cuda.to_device(src1, stream=stream)
d_hist = cuda.to_device(histogram, stream=stream)
# gpu_1d_stencil[bpg, tpb, stream](d_src1)
gpu_histogram[bpg, tpb, stream](d_src1,d_hist)
d_src1.copy_to_host(src1, stream=stream)
d_hist.copy_to_host(histogram, stream=stream)
te = timer()
print('pinned ',count," : ", te - ts)
print(histogram)
# Taking histogram on origional data.
# This histogram will contain few more frequency due to the padding we add in the orional data.
# in kernel code.
hist = src1.astype(np.int64)
x = itemfreq(hist.ravel())
hist = x#[:, 1]/sum(x[:, 1])
print(hist)
# cudatest_stencil()