def one_hot_encoding_sequences(seqs):
CHARS = 'acgt'
CHARS_COUNT = len(CHARS)
maxlen = max(map(len, seqs))
res = numpy.zeros((len(seqs), CHARS_COUNT * maxlen), dtype=numpy.uint8)
for si, seq in enumerate(seqs):
seqlen = len(seq)
arr = numpy.chararray((seqlen,), buffer=seq)
for ii, char in enumerate(CHARS):
res[si][ii*seqlen:(ii+1)*seqlen][arr == char] = 1
return res
评论列表
文章目录