def text_to_char_array(original):
r"""
Given a Python string ``original``, remove unsupported characters, map characters
to integers and return a numpy array representing the processed string.
"""
# Create list of sentence's words w/spaces replaced by ''
result = original.replace(" '", "") # TODO: Deal with this properly
result = result.replace("'", "") # TODO: Deal with this properly
result = result.replace(' ', ' ')
result = result.split(' ')
# Tokenize words into letters adding in SPACE_TOKEN where required
result = np.hstack([SPACE_TOKEN if xt == '' else list(xt) for xt in result])
# Map characters into indicies
result = np.asarray([SPACE_INDEX if xt == SPACE_TOKEN else (
ord(xt) - FIRST_INDEX if ord(xt)>FIRST_INDEX else 27+int(xt)) for xt in result])
# Add result to results
return result
评论列表
文章目录