def __parse_pairs__(self, filepath, delimiter = ',', target_col = 2, column_names = list(), sequence_length = None):
assert("target" in column_names)
with open(filepath, "r") as f:
lines = f.readlines()
try:
if sequence_length is None:
dataframe = pd.read_csv(filepath, sep = delimiter, skip_blank_lines = True,
header = None, names = column_names, index_col = False)
sequence_length = np.asarray(dataframe[["i", "j"]]).max()
except ValueError:
return None
data = np.full((sequence_length, sequence_length), np.nan, dtype = np.double)
np.fill_diagonal(data, Params.DISTANCE_WITH_ITSELF)
for line in lines:
elements = line.rstrip("\r\n").split(delimiter)
i, j, k = int(elements[0]) - 1, int(elements[1]) - 1, float(elements[target_col])
data[i, j] = data[j, i] = k
if np.isnan(data).any():
# sequence_length is wrong or the input file has missing pairs
warnings.warn("Warning: Pairs of residues are missing from the contacts text file")
warnings.warn("Number of missing pairs: %i " % np.isnan(data).sum())
return data
评论列表
文章目录