def hash(self, token):
return zlib.adler32(scrub(token)) % self.id_range
python类adler32()的实例源码
def adler32(data) :
return zlib.adler32(data) & 0xFFFFFFFF
def createMsg(self, body, replyflags=0):
pflgs=replyflags
if _has_compression and Pyro.config.PYRO_COMPRESSION:
before=len(body)
bz=zlib.compress(body) # default compression level
if len(bz)<before:
pflgs|=PFLG_COMPRESSED
body=bz
crc=0
if Pyro.config.PYRO_CHECKSUM and _has_compression:
crc=zlib.adler32(body)
pflgs|=PFLG_CHECKSUM
if Pyro.config.PYRO_XML_PICKLE=='gnosis':
pflgs|=PFLG_XMLPICKLE_GNOSIS
return struct.pack(self.headerFmt, self.headerID, self.version, self.headerSize, len(body), pflgs, crc) + body
def _decode_key_block(self, key_block_compressed, key_block_info_list):
key_list = []
i = 0
for compressed_size, decompressed_size in key_block_info_list:
start = i
end = i + compressed_size
# 4 bytes : compression type
key_block_type = key_block_compressed[start:start + 4]
# 4 bytes : adler checksum of decompressed key block
adler32 = unpack('>I', key_block_compressed[start + 4:start + 8])[0]
if key_block_type == b'\x00\x00\x00\x00':
key_block = key_block_compressed[start + 8:end]
elif key_block_type == b'\x01\x00\x00\x00':
if lzo is None:
print("LZO compression is not supported")
break
# decompress key block
header = b'\xf0' + pack('>I', decompressed_size)
key_block = lzo.decompress(key_block_compressed[start + 8:end], initSize = decompressed_size, blockSize=1308672)
elif key_block_type == b'\x02\x00\x00\x00':
# decompress key block
key_block = zlib.decompress(key_block_compressed[start + 8:end])
# extract one single key block into a key list
key_list += self._split_key_block(key_block)
# notice that adler32 returns signed value
assert(adler32 == zlib.adler32(key_block) & 0xffffffff)
i += compressed_size
return key_list
def render(self, output='html', **kwargs):
if self.cache:
cache_entry = 'ponyconf-%d' % adler32('|'.join(map(str, [self.site.domain, output, self.pending] + list(kwargs.values()))).encode('utf-8'))
result = cache.get(cache_entry)
if not result:
result = getattr(self, '_as_%s' % output)(**kwargs)
cache.set(cache_entry, result, 3 * 60 * 60) # 3H
return result
else:
return getattr(self, '_as_%s' % output)(**kwargs)
def _compress(self, fileobj, body):
"""Compress ctb-file-body and write it to <fileobj>."""
def writestr(s):
if PYTHON3:
fileobj.write(s.encode())
else:
fileobj.write(s)
if PYTHON3:
body = body.encode()
comp_body = zlib.compress(body)
adler_chksum = zlib.adler32(comp_body)
writestr('PIAFILEVERSION_2.0,CTBVER1,compress\r\npmzlibcodec')
fileobj.write(pack('LLL', adler_chksum, len(body), len(comp_body)))
fileobj.write(comp_body)
def pack_data(self, buf):
data = self.rnd_data(len(buf)) + buf
data_len = len(data) + 8
crc = binascii.crc32(struct.pack('>H', data_len)) & 0xFFFF
data = struct.pack('<H', crc) + data
data = struct.pack('>H', data_len) + data
adler32 = zlib.adler32(data) & 0xFFFFFFFF
data += struct.pack('<I', adler32)
return data
def client_post_decrypt(self, buf):
if self.raw_trans:
return buf
self.recv_buf += buf
out_buf = b''
while len(self.recv_buf) > 4:
crc = struct.pack('<H', binascii.crc32(self.recv_buf[:2]) & 0xFFFF)
if crc != self.recv_buf[2:4]:
raise Exception('client_post_decrypt data uncorrect crc')
length = struct.unpack('>H', self.recv_buf[:2])[0]
if length >= 8192 or length < 7:
self.raw_trans = True
self.recv_buf = b''
raise Exception('client_post_decrypt data error')
if length > len(self.recv_buf):
break
if struct.pack('<I', zlib.adler32(self.recv_buf[:length - 4]) & 0xFFFFFFFF) != self.recv_buf[length - 4:length]:
self.raw_trans = True
self.recv_buf = b''
raise Exception('client_post_decrypt data uncorrect checksum')
pos = common.ord(self.recv_buf[4])
if pos < 255:
pos += 4
else:
pos = struct.unpack('>H', self.recv_buf[5:7])[0] + 4
out_buf += self.recv_buf[pos:length - 4]
self.recv_buf = self.recv_buf[length:]
if out_buf:
self.decrypt_packet_num += 1
return out_buf
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def testDocFreqOneDoc(self):
texts = [['human', 'interface', 'computer']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {10608: 1, 12466: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def testDocFreqAndToken2IdForSeveralDocsWithOneWord(self):
# two docs
texts = [['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 2}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# three docs
texts = [['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 3}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# four docs
texts = [['human'], ['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 4}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def testDocFreqForOneDocWithSeveralWord(self):
# two words
texts = [['human', 'cat']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {9273: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
# three words
texts = [['human', 'cat', 'minors']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {9273: 1, 15001: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testDebugMode(self):
# two words
texts = [['human', 'cat']]
d = HashDictionary(texts, debug=True, myhash=zlib.adler32)
expected = {9273: set(['cat']), 31002: set(['human'])}
self.assertEqual(d.id2token, expected)
# now the same thing, with debug off
texts = [['human', 'cat']]
d = HashDictionary(texts, debug=False, myhash=zlib.adler32)
expected = {}
self.assertEqual(d.id2token, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def testFilter(self):
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes()
expected = {}
self.assertEqual(d.dfs, expected)
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes(no_below=0, no_above=0.3)
expected = {29104: 2, 31049: 2, 28591: 2, 5232: 2, 10608: 2, 12466: 2, 15001: 2, 31002: 2}
self.assertEqual(d.dfs, expected)
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes(no_below=3, no_above=1.0, keep_n=4)
expected = {5798: 3, 12736: 3, 18451: 3, 23844: 3}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def testDocFreqOneDoc(self):
texts = [['human', 'interface', 'computer']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {10608: 1, 12466: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def testDocFreqAndToken2IdForSeveralDocsWithOneWord(self):
# two docs
texts = [['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 2}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# three docs
texts = [['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 3}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# four docs
texts = [['human'], ['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 4}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def testDocFreqForOneDocWithSeveralWord(self):
# two words
texts = [['human', 'cat']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {9273: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
# three words
texts = [['human', 'cat', 'minors']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {9273: 1, 15001: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def testFilter(self):
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes()
expected = {}
self.assertEqual(d.dfs, expected)
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes(no_below=0, no_above=0.3)
expected = {29104: 2, 31049: 2, 28591: 2, 5232: 2, 10608: 2, 12466: 2, 15001: 2, 31002: 2}
self.assertEqual(d.dfs, expected)
d = HashDictionary(self.texts, myhash=zlib.adler32)
d.filter_extremes(no_below=3, no_above=1.0, keep_n=4)
expected = {5798: 3, 12736: 3, 18451: 3, 23844: 3}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def testDocFreqOneDoc(self):
texts = [['human', 'interface', 'computer']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {10608: 1, 12466: 1, 31002: 1}
self.assertEqual(d.dfs, expected)
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def testDocFreqAndToken2IdForSeveralDocsWithOneWord(self):
# two docs
texts = [['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 2}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# three docs
texts = [['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 3}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
# four docs
texts = [['human'], ['human'], ['human'], ['human']]
d = HashDictionary(texts, myhash=zlib.adler32)
expected = {31002: 4}
self.assertEqual(d.dfs, expected)
# only one token (human) should exist
expected = {'human': 31002}
self.assertEqual(d.token2id['human'], expected['human'])
self.assertEqual(d.token2id.keys(), expected.keys())
test_corpora_hashdictionary.py 文件源码
项目:topical_word_embeddings
作者: thunlp
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def testDebugMode(self):
# two words
texts = [['human', 'cat']]
d = HashDictionary(texts, debug=True, myhash=zlib.adler32)
expected = {9273: set(['cat']), 31002: set(['human'])}
self.assertEqual(d.id2token, expected)
# now the same thing, with debug off
texts = [['human', 'cat']]
d = HashDictionary(texts, debug=False, myhash=zlib.adler32)
expected = {}
self.assertEqual(d.id2token, expected)