def is_text(s, threshold=0.3):
"""
Determine whether a certain string is text or arbitrary bytes.
This is derived from Python Cookbook
:param s: string, input string
:param threshold: float, threshold for the max proportion in a string which can be null translates
:return:
"""
import string
text_characters = "".join(map(chr, range(32, 127)))+"\n\r\t\b"
_null_trans = string.maketrans("", "")
if "\0" in s:
return False
if not s:
return True
t = s.translate(_null_trans, text_characters)
return len(t)/len(s) <= threshold
评论列表
文章目录