def read_files(directory, length, num):
files = [join(directory, f)
for f in listdir(directory) if isfile(join(directory, f))]
# make sure there are enough text chunks to make num combinations
# of them.
txt = ""
count = 0
for f in files:
print("reading %s..." % (f))
txt += read(f)
num_chunks = len(txt) / length
count = count + 1
if num < nCr(num_chunks, 2):
break
print("Read %s/%s files in '%s'" % (count, len(files), directory))
chunks = [txt[x:x + length] for x in range(0, len(txt), length)]
print("Calculating distance average of %s measurements of text " +
"strings length %s...") % (num, length)
return list(islice(combinations(chunks, 2), 0, num))
评论列表
文章目录