def find_duplicates(a, key=None, ignoremask=True, return_index=False):
"""
Find the duplicates in a structured array along a given key
Parameters
----------
a : array-like
Input array
key : {string, None}, optional
Name of the fields along which to check the duplicates.
If None, the search is performed by records
ignoremask : {True, False}, optional
Whether masked data should be discarded or considered as duplicates.
return_index : {False, True}, optional
Whether to return the indices of the duplicated values.
Examples
--------
>>> from numpy.lib import recfunctions as rfn
>>> ndtype = [('a', int)]
>>> a = np.ma.array([1, 1, 1, 2, 2, 3, 3],
... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
>>> rfn.find_duplicates(a, ignoremask=True, return_index=True)
... # XXX: judging by the output, the ignoremask flag has no effect
"""
a = np.asanyarray(a).ravel()
# Get a dictionary of fields
fields = get_fieldstructure(a.dtype)
# Get the sorting data (by selecting the corresponding field)
base = a
if key:
for f in fields[key]:
base = base[f]
base = base[key]
# Get the sorting indices and the sorted data
sortidx = base.argsort()
sortedbase = base[sortidx]
sorteddata = sortedbase.filled()
# Compare the sorting data
flag = (sorteddata[:-1] == sorteddata[1:])
# If masked data must be ignored, set the flag to false where needed
if ignoremask:
sortedmask = sortedbase.recordmask
flag[sortedmask[1:]] = False
flag = np.concatenate(([False], flag))
# We need to take the point on the left as well (else we're missing it)
flag[:-1] = flag[:-1] + flag[1:]
duplicates = a[sortidx][flag]
if return_index:
return (duplicates, sortidx[flag])
else:
return duplicates
recfunctions.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录