def PCR_preprocess(file_path, log_mode = False, pseudotime_mode = False,
pcv_method = 'Rprincurve', anchor_gene = None,
exclude_marker_names = None):
low_gene_fraction_max = 0.8
data_tag, output_directory = create_output_directory(file_path)
cell_IDs, cell_stages, data = get_PCR_or_RNASeq_data(file_path, pseudotime_mode)
with open(file_path, 'r') as f:
markers = np.loadtxt(f, dtype = str, delimiter = '\t',
skiprows = 1 if pseudotime_mode else 2, usecols = [0])
markers.reshape(markers.size)
if exclude_marker_names:
indices = np.zeros(0, dtype = int)
for name in exclude_marker_names:
indices = np.append(indices, np.where(markers == name)[0])
data = np.delete(data, indices, axis = 1)
markers = np.delete(markers, indices)
if pseudotime_mode:
cell_stages = infer_pseudotime(data, output_directory, data_tag, pcv_method,
anchor_gene, markers)
condition = np.mean(data == 0, axis = 0) < low_gene_fraction_max
data = np.compress(condition, data, 1)
markers = np.compress(condition, markers)
write_preprocessed_data(output_directory, cell_IDs, cell_stages, data, markers)
return cell_IDs, data, markers, cell_stages.astype(float), data_tag, output_directory
评论列表
文章目录