def ngrams_extract(string):
if random.random() < SAMPLE_RATE:
print '[*]',string
l = list
grams = l(ngrams(string,2)) + l(ngrams(string,3)) + l(ngrams(string,4)) + l(ngrams(string,5))
SIZE = 1024
vec = zeros((SIZE,))
for t in grams:
vec[hash(t)%SIZE]+=1
return log(vec+1.0)
评论列表
文章目录