def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
python类decompress()的实例源码
def get_tokens(self, text):
if isinstance(text, unicode):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b('\n')) + b('\n')
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def decompress( self, zipped ) :
"""
This funtion performs all provided decompression algorithm to the provided data.
Based on the assumption that any decompression algorithm raises an Exception if the compressed data is not compatible, it finds the used compression algorithm and returns the decompressed data.
:param str message: The data to be compressed in raw bytes.
:rtype: str
:return: Data compressed by most efficient available algorithm.
"""
plain = zipped
for decomp in self.decomps :
try :
unzipped = decomp( zipped )
return unzipped
except :
pass
return plain
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def get_tokens(self, text):
if isinstance(text, unicode):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b('\n')) + b('\n')
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def load(cls, branch, kv_store, msg_cls, hash):
# Update the branch's config store
blob = kv_store[hash]
if cls.compress:
blob = decompress(blob)
data = loads(blob)
config_hash = data['config']
config_data = cls.load_config(kv_store, msg_cls, config_hash)
children_list = data['children']
assembled_children = {}
node = branch._node
for field_name, meta in children_fields(msg_cls).iteritems():
child_msg_cls = tmp_cls_loader(meta.module, meta.type)
children = []
for child_hash in children_list[field_name]:
child_node = node._mknode(child_msg_cls)
child_node.load_latest(child_hash)
child_rev = child_node.latest
children.append(child_rev)
assembled_children[field_name] = children
rev = cls(branch, config_data, assembled_children)
return rev
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def import_to_store(self, compressed_nar):
"""Given a compressed NAR, extract it and import it into the nix store.
:param compressed_nar: The bytes of a NAR, compressed.
:type compressed_nar: ``str``
"""
# Figure out how to extract the content.
if self.compression.lower() in ("xz", "xzip"):
data = lzma.decompress(compressed_nar)
elif self.compression.lower() in ("bz2", "bzip2"):
data = bz2.decompress(compressed_nar)
else:
data = gzip.decompress(compressed_nar)
# Once extracted, convert it into a nix export object and import.
export = self.nar_to_export(data)
imported_path = export.import_to_store()
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def _retrieve_content(self, compression, encoding, content):
"""Extract the content of the sent file."""
# Select the appropriate decompressor.
if compression is None:
decompress = lambda s: s
elif compression == 'bzip2':
decompress = bz2.decompress
else:
raise ValueError('Invalid compression: %s' % compression)
# Select the appropriate decoder.
if encoding == 'base64':
decode = base64.decodebytes
else:
raise ValueError('Invalid encoding: %s' % encoding)
return decompress(decode(content.encode("ascii")))
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def registerMe(name):
global sock, tcp_ip, tcp_port
inf = open('config.txt', 'r')
config = inf.readline()
tcp_ip, tcp_port = config.split(' ')
tcp_port = int(tcp_port)
sock.connect((tcp_ip, tcp_port))
data = sock.recv(MAX_LENGTH)
# id = json.loads(str(decompress(data), 'utf-8'))['id']
id = json.loads(str(data, 'utf-8'))['id']
jdata = dict()
jdata['name'] = name
s = json.dumps(jdata)
# sock.send(compress(bytes(s + '\n', 'utf-8')))
sock.send(bytes(s + '\n', 'utf-8'))
return id
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
def load(cls, path):
"Loads database from path and tests identity."
with open(path, 'rb') as file:
obj = pickle.loads(bz2.decompress(file.read()))
assert isinstance(obj, cls), 'Could not load a database object!'
obj.__path = path
return obj
########################################################################
def bz2_pack(source):
"Returns 'source' as a bzip2-compressed, self-extracting python script."
import bz2, base64
out = ""
compressed_source = bz2.compress(source)
out += 'import bz2, base64\n'
out += "exec bz2.decompress(base64.b64decode('"
out += base64.b64encode((compressed_source))
out += "'))\n"
return out
def gz_pack(source):
"Returns 'source' as a gzip-compressed, self-extracting python script."
import zlib, base64
out = ""
compressed_source = zlib.compress(source)
out += 'import zlib, base64\n'
out += "exec zlib.decompress(base64.b64decode('"
out += base64.b64encode((compressed_source))
out += "'))\n"
return out
# The test.+() functions below are for testing pyminifer...
def _lzma(self):
'''LZMA processor'''
try:
archive = lzma.decompress(self.cur_attachment.file_obj.read())
new_fn, ext = os.path.splitext(self.cur_attachment.orig_filename)
cur_file = File(archive, new_fn)
self.process_payload(cur_file)
except:
self.cur_attachment.make_dangerous()
return self.cur_attachment
def _bzip(self):
'''BZip2 processor'''
try:
archive = bz2.decompress(self.cur_attachment.file_obj.read())
new_fn, ext = os.path.splitext(self.cur_attachment.orig_filename)
cur_file = File(archive, new_fn)
self.process_payload(cur_file)
except:
self.cur_attachment.make_dangerous()
return self.cur_attachment
def bz2_pack(source):
"""
Returns 'source' as a bzip2-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import bz2, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = bz2.compress(source.encode('utf-8'))
out += 'import bz2, base64\n'
out += "exec(bz2.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
def gz_pack(source):
"""
Returns 'source' as a gzip-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import zlib, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = zlib.compress(source.encode('utf-8'))
out += 'import zlib, base64\n'
out += "exec(zlib.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
def lzma_pack(source):
"""
Returns 'source' as a lzma-compressed, self-extracting python script.
.. note::
This method uses up more space than the zip_pack method but it has the
advantage in that the resulting .py file can still be imported into a
python program.
"""
import lzma, base64
out = ""
# Preserve shebangs (don't care about encodings for this)
first_line = source.split('\n')[0]
if analyze.shebang.match(first_line):
if py3:
if first_line.rstrip().endswith('python'): # Make it python3
first_line = first_line.rstrip()
first_line += '3' #!/usr/bin/env python3
out = first_line + '\n'
compressed_source = lzma.compress(source.encode('utf-8'))
out += 'import lzma, base64\n'
out += "exec(lzma.decompress(base64.b64decode('"
out += base64.b64encode(compressed_source).decode('utf-8')
out += "')))\n"
return out
def decompress(data):
return data
#############################
### SEGMENTATION SPECIFIC ###
#############################
def decompress(data, *args, **kwargs):
return compresso.compresso.decompress(data, *args, **kwargs)
def decompress(data, *args, **kwargs):
return neuroglancer.neuroglancer.decompress(data, *args, **kwargs)
#######################
### GENERAL PURPOSE ###
#######################
def decompress(data, *args, **kwargs):
return bz2.decompress(data, *args, **kwargs)
def decompress(data, *args, **kwargs):
dictionary = list()
for ie, entry in enumerate(data):
int_value = long(entry) / (2**8)
if ie == data.size - 1:
char_value = ''
else:
char_value = chr(long(entry) % (2**8))
dictionary.append((int_value, char_value))
return lz78.lz78.decompress(dictionary, *args, **kwargs)