def step4():
key_vec = pickle.loads(open("key_vec.pkl", "rb").read())
vecs = []
for ev, vec in enumerate(key_vec.values()):
x = np.array(vec)
if np.isnan(x).any():
# print(vec)
continue
vecs.append(x)
vecs = np.array(vecs)
kmeans = KMeans(n_clusters=128, init='k-means++', n_init=10, max_iter=300,
tol=0.0001,precompute_distances='auto', verbose=0,
random_state=None, copy_x=True, n_jobs=1)
print("now fitting...")
kmeans.fit(vecs)
open("kmeans.model", "wb").write( pickle.dumps(kmeans) )
for p in kmeans.predict(vecs):
print(p)
python类dumps()的实例源码
def _step5(arr):
kmeans = pickle.loads(open("kmeans.model", "rb").read())
key, lines, tipe = arr
print(key)
open("./tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe,key=key), "w").write("\n".join(lines))
res = os.popen("./fasttext print-sentence-vectors ./models/model.bin < tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe, key=key)).read()
w = open("tmp/tmp.{tipe}.{key}.json".format(tipe=tipe,key=key), "w")
for line in res.split("\n"):
try:
vec = list(map(float, line.split()[-100:]))
except:
print(line)
print(res)
continue
x = np.array(vec)
if np.isnan(x).any():
continue
cluster = kmeans.predict([vec])
txt = line.split()[:-100]
obj = {"txt": txt, "cluster": cluster.tolist()}
data = json.dumps(obj, ensure_ascii=False)
w.write( data + "\n" )
def step6():
for tipe in ["news", "nocturne"]:
names = [name for name in reversed(sorted(glob.glob("./tmp/tmp.{tipe}.*.json".format(tipe=tipe))))]
size = len(names)
for en, name in enumerate(names):
term_clus = {}
oss = []
with open(name) as f:
for line in f:
line = line.strip()
oss.append(json.loads(line))
for i in range(3, len(oss) - 3):
terms = set( oss[i]["txt"] )
for term in terms:
if term_clus.get(term) is None:
term_clus[term] = [0.0]*128
cd = [oss[i+d]["cluster"][0] for d in [-3, -2, -1, 1, 2, 3]]
for c in cd:
term_clus[term][c] += 1.0
print("{}/{} finished {}".format(en, size, name))
open("{tipe}.term_clus.pkl".format(tipe=tipe), "wb").write( pickle.dumps(term_clus) )
def step7():
term_clus = pickle.loads(open("./news.term_clus.pkl", "rb").read())
term_clus = {term:clus for term, clus in filter(lambda x: sum(x[1]) > 30, term_clus.items()) }
for term in term_clus.keys():
vec = term_clus[term]
acc = sum(vec)
term_clus[term] = list(map(lambda x:x/acc, vec))
open("news.term_dist.pkl", "wb").write(pickle.dumps(term_clus))
term_clus = pickle.loads(open("./nocturne.term_clus.pkl", "rb").read())
term_clus = {term:clus for term, clus in filter(lambda x: sum(x[1]) > 30, term_clus.items()) }
for term in term_clus.keys():
vec = term_clus[term]
acc = sum(vec)
term_clus[term] = list(map(lambda x:x/acc, vec))
open("nocturne.term_dist.pkl", "wb").write(pickle.dumps(term_clus))
def setProtected(self,name):
'''
Set a name in the table to be protected from removal
because of limits.
'''
# generate the filepath to the protected values
# list
filePath=pathJoin(self.path,'protected.table')
# check if the path exists
if pathExists(filePath):
# read the protected list from the file
protectedList=unpickle(loadFile(filePath))
else:
# create the list and append the name
protectedList=[]
# append the new value to the list
protectedList.append(name)
# pickle the protected list for storage
protectedList=pickle(protectedList)
# write the changes back to the protected list
writeFile(filePath,protectedList)
################################################################################
def deleteValue(self,name):
'''
Delete a value with name name.
'''
# clean up names to avoid stupid
debug.add('deleting value ',name)
# figure out the path to the named value file
if name in self.names:
filePath=self.namePaths[name]
# remove the metadata entry
del self.namePaths[name]
# write changes to database metadata file
writeFile(pathJoin(self.path,'names.table'),pickle(self.namePaths))
# update the length and names attributes
self.names=self.namePaths.keys()
self.length=len(self.names)
else:
return False
if pathExists(filePath):
# remove the file accocated with the value
removeFile(filePath)
return True
else:
return False
################################################################################
def test_adapt_subclass(self):
from psycopg2.extras import json, Json
class DecimalEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return float(obj)
return json.JSONEncoder.default(self, obj)
class MyJson(Json):
def dumps(self, obj):
return json.dumps(obj, cls=DecimalEncoder)
curs = self.conn.cursor()
obj = Decimal('123.45')
self.assertEqual(curs.mogrify("%s", (MyJson(obj),)), b"'123.45'")
def test_adapt_dumps(self):
from psycopg2.extras import json, Json
class DecimalEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Decimal):
return float(obj)
return json.JSONEncoder.default(self, obj)
curs = self.conn.cursor()
obj = Decimal('123.45')
def dumps(obj):
return json.dumps(obj, cls=DecimalEncoder)
self.assertEqual(curs.mogrify("%s", (Json(obj, dumps=dumps),)),
b"'123.45'")
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
self._createdir() # Cache dir can be deleted at any time.
fname = self._key_to_file(key, version)
self._cull() # make some room if necessary
fd, tmp_path = tempfile.mkstemp(dir=self._dir)
renamed = False
try:
with io.open(fd, 'wb') as f:
expiry = self.get_backend_timeout(timeout)
f.write(pickle.dumps(expiry, -1))
f.write(zlib.compress(pickle.dumps(value), -1))
file_move_safe(tmp_path, fname, allow_overwrite=True)
renamed = True
finally:
if not renamed:
os.remove(tmp_path)
def serialize(obj):
return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
def post(self):
payload = {
'owner' : request.form['owner'],
'package' : request.form['package'],
'data' : request.form['data']
}
owner = request.form['owner']
package = request.form['package']
data = request.form['data']
b = ENGINE.get_named_secret(owner)
print(b)
secret = rsa.decrypt(eval(b), KEY[1])
# data is a python tuple of the templated solidity at index 0 and an example payload at index 1
# compilation of this code should return true
# if there are errors, don't commit it to the db
# otherwise, commit it
raw_data = decrypt(secret, eval(data))
package_data = json.loads(raw_data.decode('utf8'))
'''
payload = {
'tsol' : open(code_path[0]).read(),
'example' : example
}
'''
# assert that the code compiles with the provided example
tsol.compile(StringIO(package_data['tsol']), package_data['example'])
template = pickle.dumps(package_data['tsol'])
example = pickle.dumps(package_data['example'])
if ENGINE.add_package(owner, package, template, example) == True:
return success_payload(None, 'Package successfully uploaded.')
return error_payload('Problem uploading package. Try again.')
def dump(type, exc):
"""
Always return a dumped (pickled) type and exc. If exc can't be pickled,
wrap it in UnpickleableException first.
"""
try:
return pickle.dumps(type), pickle.dumps(exc)
except Exception:
# get UnpickleableException inside the sandbox
from setuptools.sandbox import UnpickleableException as cls
return cls.dump(cls, cls(repr(exc)))
def do_POST(self):
if COORD.started:
src = self.rfile.read(int(self.headers['content-length']))
job = COORD.next_job(pickle.loads(src))
if job:
self._send_answer(pickle.dumps(job))
return
self.send_response(404)
else:
self.send_response(202)
self.end_headers()
def next_job(self, job):
'''Sends a finished job back to the coordinator and retrieves in exchange the next one.
Kwargs:
job (WorkerJob): job that was finished by a worker and who's results are to be
digested by the coordinator
Returns:
WorkerJob. next job of one of the running epochs that will get
associated with the worker from the finished job and put into state 'running'
'''
if is_chief:
# Try to find the epoch the job belongs to
epoch = next((epoch for epoch in self._epochs_running if epoch.id == job.epoch_id), None)
if epoch:
# We are going to manipulate things - let's avoid undefined state
with self._lock:
# Let the epoch finish the job
epoch.finish_job(job)
# Check, if epoch is done now
if epoch.done():
# If it declares itself done, move it from 'running' to 'done' collection
self._epochs_running.remove(epoch)
self._epochs_done.append(epoch)
# Show the short and/or full WER report
log_info(epoch)
else:
# There was no running epoch found for this job - this should never happen.
log_error('There is no running epoch of id %d for job with ID %d.' % (job.epoch_id, job.id))
return self.get_job(job.worker)
# We are a remote worker and have to hand over to the chief worker by HTTP
result = self._talk_to_chief('', data=pickle.dumps(job))
if result:
result = pickle.loads(result)
return result
def do_POST(self):
if COORD.started:
src = self.rfile.read(int(self.headers['content-length']))
job = COORD.next_job(pickle.loads(src))
if job:
self._send_answer(pickle.dumps(job))
return
self.send_response(404)
else:
self.send_response(202)
self.end_headers()
def do_POST(self):
if COORD.started:
src = self.rfile.read(int(self.headers['content-length']))
job = COORD.next_job(pickle.loads(src))
if job:
self._send_answer(pickle.dumps(job))
return
self.send_response(404)
else:
self.send_response(202)
self.end_headers()
def next_job(self, job):
'''Sends a finished job back to the coordinator and retrieves in exchange the next one.
Kwargs:
job (WorkerJob): job that was finished by a worker and who's results are to be
digested by the coordinator
Returns:
WorkerJob. next job of one of the running epochs that will get
associated with the worker from the finished job and put into state 'running'
'''
if is_chief:
# Try to find the epoch the job belongs to
epoch = next((epoch for epoch in self._epochs_running if epoch.id == job.epoch_id), None)
if epoch:
# We are going to manipulate things - let's avoid undefined state
with self._lock:
# Let the epoch finish the job
epoch.finish_job(job)
# Check, if epoch is done now
if epoch.done():
# If it declares itself done, move it from 'running' to 'done' collection
self._epochs_running.remove(epoch)
self._epochs_done.append(epoch)
# Show the short and/or full WER report
log_info(epoch)
else:
# There was no running epoch found for this job - this should never happen.
log_error('There is no running epoch of id %d for job with ID %d.' % (job.epoch_id, job.id))
return self.get_job(job.worker)
# We are a remote worker and have to hand over to the chief worker by HTTP
result = self._talk_to_chief('', data=pickle.dumps(job))
if result:
result = pickle.loads(result)
return result
def dump(type, exc):
"""
Always return a dumped (pickled) type and exc. If exc can't be pickled,
wrap it in UnpickleableException first.
"""
try:
return pickle.dumps(type), pickle.dumps(exc)
except Exception:
# get UnpickleableException inside the sandbox
from setuptools.sandbox import UnpickleableException as cls
return cls.dump(cls, cls(repr(exc)))
def fix_episode(episode_path):
try:
episode = load_episode(episode_path)
except EOFError:
print("Error reading: {}".format(episode_path))
os.remove(episode_path)
return
if episode.version == 2:
print("Version 2 already: {}".format(episode_path))
return
old_frames = episode.frames
episode.frames = []
for i in range(len(old_frames) - 1):
f = old_frames[i]
f.action = old_frames[i + 1].action
episode.frames.append(f)
episode.version = 2
s = pickle.dumps(episode)
with gzip.open(episode_path, "wb") as f:
f.write(s)
# pickler = pickle.Pickler(f)
# pickler.dump(episode)
# save_episode(episode_path, episode)
def fix_episode(episode_path):
try:
episode = load_episode(episode_path)
except EOFError:
print("Error reading: {}".format(episode_path))
os.remove(episode_path)
return
if episode.version == 2:
print("Version 2 already: {}".format(episode_path))
return
old_frames = episode.frames
episode.frames = []
for i in range(len(old_frames) - 1):
f = old_frames[i]
f.action = old_frames[i + 1].action
episode.frames.append(f)
episode.version = 2
s = pickle.dumps(episode)
with gzip.open(episode_path, "wb") as f:
f.write(s)
# pickler = pickle.Pickler(f)
# pickler.dump(episode)
# save_episode(episode_path, episode)