def load_yeast_tavazoie():
"""Load and return the yeast dataset (Tavazoie et al., 2000) used in the original biclustering study
of Cheng and Church (2000) as a pandas.DataFrame. All elements equal to -1 are missing values. This
dataset is freely available in http://arep.med.harvard.edu/biclustering/.
Reference
---------
Cheng, Y., & Church, G. M. (2000). Biclustering of expression data. In Ismb (Vol. 8, No. 2000, pp. 93-103).
Tavazoie, S., Hughes, J. D., Campbell, M. J., Cho, R. J., & Church, G. M. (1999). Systematic determination of genetic
network architecture. Nature genetics, 22(3), 281-285.
"""
module_dir = dirname(__file__)
data = np.loadtxt(join(module_dir, 'data', 'yeast_tavazoie', 'yeast_tavazoie.txt'), dtype=np.double)
genes = np.loadtxt(join(module_dir, 'data', 'yeast_tavazoie', 'genes_yeast_tavazoie.txt'), dtype=np.character)
return pd.DataFrame(data, index=genes)
python类character()的实例源码
def RATWriteArray(rat, array, field, start=0):
"""
Pure Python implementation of writing a chunk of the RAT
from a numpy array. Type of array is coerced to one of the types
(int, double, string) supported. Called from RasterAttributeTable.WriteArray
"""
if array is None:
raise ValueError("Expected array of dim 1")
# if not the array type convert it to handle lists etc
if not isinstance(array, numpy.ndarray):
array = numpy.array(array)
if array.ndim != 1:
raise ValueError("Expected array of dim 1")
if (start + array.size) > rat.GetRowCount():
raise ValueError("Array too big to fit into RAT from start position")
if numpy.issubdtype(array.dtype, numpy.integer):
# is some type of integer - coerce to standard int
# TODO: must check this is fine on all platforms
# confusingly numpy.int 64 bit even if native type 32 bit
array = array.astype(numpy.int32)
elif numpy.issubdtype(array.dtype, numpy.floating):
# is some type of floating point - coerce to double
array = array.astype(numpy.double)
elif numpy.issubdtype(array.dtype, numpy.character):
# cast away any kind of Unicode etc
array = array.astype(numpy.character)
else:
raise ValueError("Array not of a supported type (integer, double or string)")
return RATValuesIONumPyWrite(rat, field, start, array)
def __getitem__(self, obj):
val = numpy.ndarray.__getitem__(self, obj)
if isinstance(val, numpy.character):
temp = val.rstrip()
if numpy.char._len(temp) == 0:
val = ''
else:
val = temp
return val
def _collect_attrs(self, name, obj):
"""Collect all the attributes for the provided file object.
"""
for key in obj.ncattrs():
value = getattr(obj, key)
value = np.squeeze(value)
if issubclass(value.dtype.type, str) or np.issubdtype(value.dtype, np.character):
self.file_content["{}/attr/{}".format(name, key)] = str(value)
else:
self.file_content["{}/attr/{}".format(name, key)] = value
def normalize_attr_values(a: Any) -> np.ndarray:
"""
Take all kinds of input values and validate/normalize them.
Args:
a List, tuple, np.matrix, np.ndarray or sparse matrix
Elements can be strings, numbers or bools
Returns
a_normalized An np.ndarray with elements either float64 or unicode string objects
Remarks:
This method should be used to prepare the values to be stored in the HDF5 file. You should not
return the values to the caller; for that, use materialize_attr_values()
"""
scalar = False
if np.isscalar(a):
a = np.array([a])
scalar = True
arr = normalize_attr_array(a)
if np.issubdtype(arr.dtype, np.integer) or np.issubdtype(arr.dtype, np.floating):
pass # We allow all these types
elif np.issubdtype(arr.dtype, np.character) or np.issubdtype(arr.dtype, np.object_):
arr = normalize_attr_strings(arr)
elif np.issubdtype(arr.dtype, np.bool_):
arr = arr.astype('ubyte')
if scalar:
return arr[0]
else:
return arr