def test_one_hot():
text = 'The cat sat on the mat.'
encoded = one_hot(text, 5)
assert len(encoded) == 6
assert np.max(encoded) <= 4
assert np.min(encoded) >= 0
python类one_hot()的实例源码
def to_one_hot_array(self, string_list, max_index= 256):
"""Transform list of input strings into numpy array of zero-padded one-hot (index) encodings."""
self.max_index = max_index
x_one_hot = [one_hot(" ".join(list(sentence)), n = max_index) for sentence in string_list]
self.max_len = max([len(s) for s in x_one_hot])
X = np.array(pad_sequences(x_one_hot, maxlen=self.max_len))
self.relevant_indices = np.unique(X)
charset = set(list(" ".join(string_list)))
self.charset = charset
encoding = one_hot(" ".join(charset),n=max_index)
self.charset_map = dict(zip(charset,encoding) )
self.inv_charset_map = dict(zip(encoding, charset) )
return X
def test_one_hot():
text = 'The cat sat on the mat.'
encoded = one_hot(text, 5)
assert len(encoded) == 6
assert np.max(encoded) <= 4
assert np.min(encoded) >= 0
def lstm_predict(filename, content) :
print(content)
model_input = sequence.pad_sequences([one_hot(str(content), 5000)], 1000)
model = load_model(filename)
prediction = model.predict(model_input)
return prediction.tolist()[0][0]
def lstm_predict(filename, content) :
print(content)
model_input = sequence.pad_sequences([one_hot(str(content), 5000)], 1000)
model = load_model(filename)
prediction = model.predict(model_input)
return prediction.tolist()[0][0]
def test_one_hot():
text = 'The cat sat on the mat.'
encoded = one_hot(text, 5)
assert len(encoded) == 6
assert np.max(encoded) <= 4
assert np.min(encoded) >= 0
def one_hot(word_model, n):
return text.one_hot(
word_model, n, filters=text_filter(), lower=False, split=" ")
def one_hot(word_model, n):
return text.one_hot(
word_model, n, filters=text_filter(), lower=False, split=" ")
def to_one_hot(self, input_str,max_index=256, padding_length=30):
"""Transform single input string into zero-padded one-hot (index) encoding."""
input_one_hot = one_hot(" ".join(list(input_str)), n = max_index)
return pad_sequences([input_one_hot], maxlen=padding_length)
def getFeatureMatrix(self, df):
if cfg.input_type == "text":
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
textconverter = lambda x: x
if sys.version_info[0] == 2:
textconverter = lambda x: x.encode("utf-8")
X = pad_sequences(
df.apply(lambda row: one_hot(textconverter(row[self.text_field]), self.vocabulary_size), axis=1),
self.word_limit)
self.fields = [cfg.text_field]
self.input_shape = (self.word_limit,)
elif self.objective == "time_series":
num_series = 1+len(self.fields)
data = [df[self.target].tolist()]
num_rows = len(data[0])
for field in self.fields:
data.append(df[field].tolist())
instances = []
target_instances = []
for index in range(num_rows - (self.window_size+1)):
windows = []
for windex in range(self.window_size):
series = []
for sindex in range(num_series):
series.append(data[sindex][index+windex])
windows.append(series)
target_window = []
for sindex in range(num_series):
target_window.append(data[sindex][index + self.window_size])
instances.append(windows)
target_instances.append(target_window)
X = np.array(instances)
self.seqtargets = np.array(target_instances)
X = np.reshape(X, (X.shape[0], self.window_size, num_series))
print(X.shape)
self.input_shape = (self.window_size, num_series)
else:
X = df.as_matrix(self.fields)
self.input_shape = (len(self.fields),)
self.model_metadata["predictors"] = self.fields
return X