def pix2ang_ring(nside, ipix):
"""Calculate the angular coordinates on the sphere for each pixel
index in the RING ordering scheme.
Parameters
----------
ipix : 1D or 2D `~numpy.ndarray`
The indexes of the HEALPix pixels in the RING ordering
Returns
-------
theta : 1D or 2D `~numpy.ndarray`
The polar angles (i.e., latitudes), ? ? [0, ?]. (unit: rad)
phi : 1D or 2D `~numpy.ndarray`
The azimuthal angles (i.e., longitudes), ? ? [0, 2?). (unit: rad)
The shape is the same as the input array.
NOTE
----
* Only support the *RING* ordering scheme
* This is the JIT-optimized version that partially replaces the
``healpy.pix2ang``
"""
shape = ipix.shape
size = ipix.size
ipix = ipix.flatten()
theta = np.zeros(size, dtype=np.float64)
phi = np.zeros(size, dtype=np.float64)
for i in range(size):
theta_, phi_ = pix2ang_ring_single(nside, ipix[i])
theta[i] = theta_
phi[i] = phi_
return (theta.reshape(shape), phi.reshape(shape))
python类float64()的实例源码
def compute_log_p_sample(data, f, t):
C = len(data.cn)
population_prior = np.zeros(3)
population_prior[0] = (1 - t)
population_prior[1] = t * (1 - f)
population_prior[2] = t * f
ll = np.ones(C, dtype=np.float64) * np.inf * -1
for c in range(C):
e_vaf = 0
norm_const = 0
for i in range(3):
e_cn = population_prior[i] * data.cn[c, i]
e_vaf += e_cn * data.mu[c, i]
norm_const += e_cn
e_vaf /= norm_const
ll[c] = data.log_pi[c] + binomial_log_pdf(data.a + data.b, data.b, e_vaf)
return log_sum_exp(ll)
def hist_cuda_test():
histogram_array = src1#np.zeros(vectorSize*BIN_COUNT, dtype=np.int32).reshape(vectorSize,BIN_COUNT)
# This will be calculated from the Camera's Image processed on GPU.
# Lets hardcode it at the moment
histogram = src1[SEARCH_INDEX]#np.zeros(BIN_COUNT, dtype=np.float32)
results = np.zeros(9, dtype=np.float64)
foundIndex = -1
# use stream to trigger async memory transfer
cstream = cuda.stream()
ts = timer()
# Increase Counter to measure the Efficiency
count = 1
for i in range(count):
with cstream.auto_synchronize():
# For Histogram Compairision.
d_histogram_array = cuda.to_device(histogram_array, stream=cstream)
d_histogram = cuda.to_device(histogram, stream=cstream)
d_results = cuda.to_device(results, stream=cstream)
d_foundIndex = cuda.to_device(foundIndex, stream=cstream)
hist_comp[1, vectorSize, cstream](d_histogram_array,d_histogram,d_results,d_foundIndex)
d_histogram_array.copy_to_host(histogram_array, stream=cstream)
d_histogram.copy_to_host(histogram, stream=cstream)
d_results.copy_to_host(results, stream=cstream)
d_foundIndex.copy_to_host(foundIndex, stream=cstream)
te = timer()
print('GPU Process ',count," Iterations : in ", te - ts)
print('histogram is')
print(results)
print('Found Index ', foundIndex)
def hist_cuda_test():
histogram_array = src1#np.zeros(vectorSize*BIN_COUNT, dtype=np.int32).reshape(vectorSize,BIN_COUNT)
histogram = src1[SEARCH_INDEX]#np.zeros(BIN_COUNT, dtype=np.float32)
results = np.zeros(9, dtype=np.float64)
# use stream to trigger async memory transfer
cstream = cuda.stream()
ts = timer()
# Increase Counter to measure the Efficiency
count = 1
for i in range(count):
with cstream.auto_synchronize():
# For Histogram Compairision.
d_histogram_array = cuda.to_device(histogram_array, stream=cstream)
d_histogram = cuda.to_device(histogram, stream=cstream)
d_results = cuda.to_device(results, stream=cstream)
hist_comp[1, vectorSize, cstream](d_histogram_array,d_histogram,d_results)
d_histogram_array.copy_to_host(histogram_array, stream=cstream)
d_histogram.copy_to_host(histogram, stream=cstream)
d_results.copy_to_host(results, stream=cstream)
te = timer()
print('GPU Process ',count," Iterations : in ", te - ts)
print('histogram is')
print(results)
def hist_comp(arry, hist, result, index):
# We have N threads per block
# And We have one block only
x = cuda.grid(1)
R = cuda.shared.array(9, dtype=float64)
# No of featureVectors
# array.shape[0] == 9*34
A = cuda.shared.array(shape=(9,34), dtype=float64)
# Vecture To Compair
# hist.shape[0] == BIN_COUNT == 34 ?
B = cuda.shared.array(34, dtype=float64)
for i in range(BIN_COUNT):
B[i] = hist[i]
A[x] = arry[x]
cuda.syncthreads()
# Do Actual Calculations.
# i.e: kullback_leibler_divergence
Sum = 0.00
for i in range(BIN_COUNT):
a = B[i]
b = A[x][i]
Sum += (a * (math.log(a/b) / math.log(2.0)))
# R Contains the KL-Divergences
R[x] = Sum
cuda.syncthreads()
# These Should be Shared Variables.
Min = cuda.shared.array(1,dtype=float32)
mIndex = cuda.shared.array(1,dtype=int8)
Min = 0.0000000000
mIndex = 0
if x == 0:
Min = R[x]
mIndex = x
cuda.syncthreads()
if R[x] <= Min:
Min = R[x]
mIndex = x
cuda.syncthreads()
if x == mIndex :
index=mIndex
def hist_comp(arry, hist, result):
# We have N threads per block
# And We have one block only
x = cuda.grid(1)
R = cuda.shared.array(9, dtype=float64)
# No of featureVectors
# array.shape[0] == 9*34
A = cuda.shared.array(shape=(9,34), dtype=float64)
# Vecture To Compair
# hist.shape[0] == BIN_COUNT == 34 ?
B = cuda.shared.array(34, dtype=float64)
for i in range(BIN_COUNT):
B[i] = hist[i]
A[x] = arry[x]
cuda.syncthreads()
# Do Actual Calculations.
# i.e: kullback_leibler_divergence
Sum = 0.00
for i in range(BIN_COUNT):
a = B[i]
b = A[x][i]
Sum += (a * (math.log(a/b) / math.log(2.0)))
# R Contains the KL-Divergences
R[x] = Sum
cuda.syncthreads()
# Finding the Min Divergence OR
# Finding the sum of all Divergences
# by Reducing Method
rSize = cuda.blockDim.x >> 1
while rSize > 0:
if x < rSize:
R[x] = (R[x]+R[x+rSize])
# R[x] = min(R[x],R[x+rSize])
rSize >>= 1
cuda.syncthreads()
# This implementation doesn't take care of last two values.
# So, Using Hack
# TODO: need to Fix It.
if x == 0 :
# R[x] = x if R[x] < R[x+1] else (x+1)
R[x] = (R[x]+R[x+1])
# R[x] = min(R[x],R[x+1])
result[x] = R[x]