def timeseries2seqs_peroid_trend(data, timestamps, length=3, T=48, peroid=pd.DateOffset(days=7), peroid_len=2):
raw_ts = copy(timestamps)
if type(timestamps[0]) != pd.Timestamp:
timestamps = string2timestamp(timestamps, T=T)
# timestamps index
timestamp_idx = dict()
for i, t in enumerate(timestamps):
timestamp_idx[t] = i
offset = pd.DateOffset(minutes=24 * 60 // T)
breakpoints = [0]
for i in range(1, len(timestamps)):
if timestamps[i-1] + offset != timestamps[i]:
print(timestamps[i-1], timestamps[i], raw_ts[i-1], raw_ts[i])
breakpoints.append(i)
breakpoints.append(len(timestamps))
X = []
Y = []
for b in range(1, len(breakpoints)):
print('breakpoints: ', breakpoints[b-1], breakpoints[b])
idx = range(breakpoints[b-1], breakpoints[b])
for i in range(len(idx) - length):
# period
target_timestamp = timestamps[i+length]
legal_idx = []
for pi in range(1, 1+peroid_len):
if target_timestamp - peroid * pi not in timestamp_idx:
break
legal_idx.append(timestamp_idx[target_timestamp - peroid * pi])
# print("len: ", len(legal_idx), peroid_len)
if len(legal_idx) != peroid_len:
continue
legal_idx += idx[i:i+length]
# trend
x = np.vstack(data[legal_idx])
y = data[idx[i+length]]
X.append(x)
Y.append(y)
X = np.asarray(X)
Y = np.asarray(Y)
print("X shape: ", X.shape, "Y shape:", Y.shape)
return X, Y
评论列表
文章目录