def fmt_numerical(self, data):
"""
There are two categories of data Porn Sieve gathers:
1.Tag data, represented as a binary, mostly zero array of numbers
2. Data which is continuous, such as duration, average review, etc.
For the tags, I can just use CountVectorizer out-of-the-box, but for
the other data, we need to put it all together in a list on our own.
"""
nums = []
# sorted to ensure the data is always in the same order
for k in sorted(data.keys()):
if k in ["feedback", "img"]:
pass
elif type(data[k]) == list:
pass
elif data[k] == None:
nums.append(0)
elif (k == "scrape_date") and (type(data[k]) != float):
stamp = datetime.strptime(data[k], "%Y-%m-%d %H:%M:%S.%f")
epoch = datetime.utcfromtimestamp(0)
nums.append((stamp - epoch).total_seconds())
elif np.isreal(data[k]):
nums.append(data[k])
return nums
评论列表
文章目录