def make_complete_graph(num_vertices):
"""Constructs a complete graph.
The pairing function is: k = v1 + v2 * (v2 - 1) // 2
Args:
num_vertices: Number of vertices.
Returns: A tuple with elements:
V: Number of vertices.
K: Number of edges.
grid: a 3 x K grid of (edge, vertex, vertex) triples.
"""
V = num_vertices
K = V * (V - 1) // 2
grid = np.zeros([3, K], np.int32)
k = 0
for v2 in range(V):
for v1 in range(v2):
grid[:, k] = [k, v1, v2]
k += 1
return grid
python类int32()的实例源码
def make_tree(edges):
"""Constructs a tree graph from a set of (vertex,vertex) pairs.
Args:
edges: A list or set of unordered (vertex, vertex) pairs.
Returns: A tuple with elements:
V: Number of vertices.
E: Number of edges.
grid: a 3 x E grid of (edge, vertex, vertex) triples.
"""
assert all(isinstance(edge, tuple) for edge in edges)
edges = [tuple(sorted(edge)) for edge in edges]
edges.sort()
E = len(edges)
grid = np.zeros([3, E], np.int32)
for e, (v1, v2) in enumerate(edges):
grid[:, e] = [e, v1, v2]
return grid
def __init__(self, num_vertices):
logger.debug('TreeStructure with %d vertices', num_vertices)
self._num_vertices = num_vertices
self._num_edges = num_vertices - 1
self.set_edges([(v, v + 1) for v in range(num_vertices - 1)])
self._complete_grid = None # Lazily constructed.
self._vertices = np.arange(num_vertices, dtype=np.int32)
def numba_csgraph(csr, node_props=None):
if node_props is None:
node_props = np.broadcast_to(1., csr.shape[0])
node_props.flags.writeable = True
return CSGraph(csr.indptr, csr.indices, csr.data,
np.array(csr.shape, dtype=np.int32), node_props)
def cudatest_hist():
# src1 = np.arange(n, dtype=np.float32)
src1 = np.random.randint(BIN_COUNT,size=n).astype(np.float32)
histogram = np.zeros(BIN_COUNT, dtype=np.int32)
print(src1)
stream = cuda.stream() # use stream to trigger async memory transfer
ts = timer()
# Controll the iterations
count = 1
for i in range(count):
with stream.auto_synchronize():
# ts = timer()
d_src1 = cuda.to_device(src1, stream=stream)
d_hist = cuda.to_device(histogram, stream=stream)
# gpu_1d_stencil[bpg, tpb, stream](d_src1)
gpu_histogram[bpg, tpb, stream](d_src1,d_hist)
d_src1.copy_to_host(src1, stream=stream)
d_hist.copy_to_host(histogram, stream=stream)
te = timer()
print('pinned ',count," : ", te - ts)
print(histogram)
# Taking histogram on origional data.
# This histogram will contain few more frequency due to the padding we add in the orional data.
# in kernel code.
hist = src1.astype(np.int64)
x = itemfreq(hist.ravel())
hist = x#[:, 1]/sum(x[:, 1])
print(hist)
# cudatest_stencil()
def thresholding(arry, hist):
# We have 10*10 threads per block
A = cuda.shared.array(shape=(32,32), dtype=int32)
x,y = cuda.grid(2)
ty = cuda.threadIdx.x
tx = cuda.threadIdx.y
A[ty,tx] = arry[x,y]
cuda.syncthreads()
threadCountX = A.shape[0] - 1
threadCountY = A.shape[1] - 1
# If within x range and y range then calculate the LBP discriptor along
# with histogram value to specific bin
# Other wise Ignore the Value
if (ty > 0 and (threadCountX-ty) > 0 ) and (tx > 0 and (threadCountY-tx) > 0):
# You can do the Processing here. ^_^
code = 0
# We need to make sure that each value is accessable to each thread
# TODO: make them atomic
center = A[ty, tx]
code = 0 if center > 150 else 255
code = ( code - center)
A[ty,tx] = code
# Wait All Threads to Sync here.
cuda.syncthreads()
val = A[ty,tx]
cuda.atomic.add(arry, (x,y),val)
cuda.syncthreads()
# This Atomic Operation is equivalent to hist[code % 256] += 1
ind = code % BIN_COUNT
cuda.atomic.add(hist, ind, 1)
def unsharp_masking(arry, hist):
# We have 10*10 threads per block
A = cuda.shared.array(shape=(32,32), dtype=int32)
# H = cuda.shared.array(BIN_COUNT, dtype=int32)
x,y = cuda.grid(2)
ty = cuda.threadIdx.x
tx = cuda.threadIdx.y
A[ty,tx] = arry[x,y]
cuda.syncthreads()
threadCountX = A.shape[0] - 1
threadCountY = A.shape[1] - 1
# If within x range and y range then calculate the LBP discriptor along
# with histogram value to specific bin
# Other wise Ignore the Value
if (ty > 0 and (threadCountX-ty) > 0 ) and (tx > 0 and (threadCountY-tx) > 0):
# # You can do the Processing here. ^_^
code = 0
# We need to make sure that each value is accessable to each thread
# TODO: make them atomic
center = A[ty, tx]
# Lets try averaging,
code += A[ty-1][tx-1]*-1
code += A[ty][tx-1]*-2
code += A[ty+1][tx-1]*-1
code += A[ty+1][tx]*-2
code += A[ty+1][tx+1]*-1
code += A[ty][tx+1]*-2
code += A[ty-1][tx+1]*-1
code += A[ty-1][tx-1]*-2
code = code / 16
code = ( code - center)
A[ty,tx] = code
# cuda.atomic.add(A, (ty,tx),code)
cuda.syncthreads()
val = A[ty,tx]
cuda.atomic.add(arry, (x,y),val)
cuda.syncthreads()
# This Atomic Operation is equivalent to hist[code % 256] += 1
ind = code % BIN_COUNT
cuda.atomic.add(hist, ind, 1)