def getFeatureMatrix(self, df):
if cfg.input_type == "text":
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
textconverter = lambda x: x
if sys.version_info[0] == 2:
textconverter = lambda x: x.encode("utf-8")
X = pad_sequences(
df.apply(lambda row: one_hot(textconverter(row[self.text_field]), self.vocabulary_size), axis=1),
self.word_limit)
self.fields = [cfg.text_field]
self.input_shape = (self.word_limit,)
elif self.objective == "time_series":
num_series = 1+len(self.fields)
data = [df[self.target].tolist()]
num_rows = len(data[0])
for field in self.fields:
data.append(df[field].tolist())
instances = []
target_instances = []
for index in range(num_rows - (self.window_size+1)):
windows = []
for windex in range(self.window_size):
series = []
for sindex in range(num_series):
series.append(data[sindex][index+windex])
windows.append(series)
target_window = []
for sindex in range(num_series):
target_window.append(data[sindex][index + self.window_size])
instances.append(windows)
target_instances.append(target_window)
X = np.array(instances)
self.seqtargets = np.array(target_instances)
X = np.reshape(X, (X.shape[0], self.window_size, num_series))
print(X.shape)
self.input_shape = (self.window_size, num_series)
else:
X = df.as_matrix(self.fields)
self.input_shape = (len(self.fields),)
self.model_metadata["predictors"] = self.fields
return X
评论列表
文章目录