def print_training_params(self, cgs, training_params):
enc_dec_param_dict = merge(self.encoder.get_params(),
self.decoder.get_params())
# Print which parameters are excluded
for k, v in cgs.iteritems():
excluded_all = list(set(v.parameters) - set(training_params[k]))
for p in excluded_all:
logger.info(
'Excluding from training of CG[{}]: {}'
.format(k, [key for key, val in
enc_dec_param_dict.iteritems()
if val == p][0]))
logger.info(
'Total number of excluded parameters for CG[{}]: [{}]'
.format(k, len(excluded_all)))
for k, v in training_params.iteritems():
for p in v:
logger.info('Training parameter from CG[{}]: {}'
.format(k, p.name))
logger.info(
'Total number of parameters will be trained for CG[{}]: [{}]'
.format(k, len(v)))
python类merge()的实例源码
def scrutinize(self, scrutine, context_frame):
constructor = type(scrutine)
if constructor.__name__ != self._constructor_name:
raise NoMatch()
kwargs = scrutine._kwargs
# the context to evaluate the thunk in
context = {
Call(Normal(name_lookup), (Normal(name),), {}): Normal(value)
for name, value in merge(
vars(builtins),
context_frame.f_globals,
context_frame.f_locals,
# the newly bound arguments have the highest precedence
dict(zip(self._argnames, scrutine._args)),
{v: kwargs[k] for k, v in self._kwargnames.items()},
).items()
}
bound_tree = LTree.parse(self._expr).subs(context)
return strict(bound_tree.lcompile())
def run_example(example_name, environ):
"""
Run an example module from catalyst.examples.
"""
mod = EXAMPLE_MODULES[example_name]
register_calendar("YAHOO", get_calendar("NYSE"), force=True)
return run_algorithm(
initialize=getattr(mod, 'initialize', None),
handle_data=getattr(mod, 'handle_data', None),
before_trading_start=getattr(mod, 'before_trading_start', None),
analyze=getattr(mod, 'analyze', None),
bundle='test',
environ=environ,
# Provide a default capital base, but allow the test to override.
**merge({'capital_base': 1e7}, mod._test_args())
)
def prepare(query, connection=None, external=None):
connection = merge(_default, connection or {})
database = escape(connection['database'])
query = query.format(db=database)
params = {'query': query,
'user': connection['user'],
'password': connection['password']}
params = valfilter(lambda x: x, params)
files = {}
external = external or {}
for name, (structure, serialized) in external.items():
params['{}_format'.format(name)] = 'CSV'
params['{}_structure'.format(name)] = structure
files[name] = serialized
host = connection['host']
return host, params, files
def apply(self, source_sentence, source_sentence_mask):
"""Creates bidirectional RNN source annotations.
Args:
source_sentence (Variable): Source sentence with words in
vector representation.
source_sentence_mask (Variable): Source mask
Returns:
Variable. source annotations
"""
# Time as first dimension
source_sentence = source_sentence.T
source_sentence_mask = source_sentence_mask.T
representation = self.bidir.apply(
merge(self.fwd_fork.apply(source_sentence, as_dict=True),
{'mask': source_sentence_mask}),
merge(self.back_fork.apply(source_sentence, as_dict=True),
{'mask': source_sentence_mask})
)
return representation, source_sentence_mask
def print_training_params(self, cgs, training_params):
enc_dec_param_dict = merge(self.encoder.get_params(),
self.decoder.get_params())
# Print which parameters are excluded
for k, v in cgs.iteritems():
excluded_all = list(set(v.parameters) - set(training_params[k]))
for p in excluded_all:
logger.info(
'Excluding from training of CG[{}]: {}'
.format(k, [key for key, val in
enc_dec_param_dict.iteritems()
if val == p][0]))
logger.info(
'Total number of excluded parameters for CG[{}]: [{}]'
.format(k, len(excluded_all)))
for k, v in training_params.iteritems():
for p in v:
logger.info('Training parameter from CG[{}]: {}'
.format(k, p.name))
logger.info(
'Total number of parameters will be trained for CG[{}]: [{}]'
.format(k, len(v)))
def __init__(self, key, mapping=None, **kwargs):
self._map = {}
self._sorted_key_names = []
self._sort_key = key
self.update(merge(mapping or {}, kwargs))
def load_adjusted_array(self, columns, dates, assets, mask):
return merge(
self.get_loader(column).load_adjusted_array(
[column], dates, assets, mask
)
for column in columns
)
def test_ewm_stats(self, window_length):
def ewma_name(decay_rate):
return 'ewma_%s' % decay_rate
def ewmstd_name(decay_rate):
return 'ewmstd_%s' % decay_rate
decay_rates = [0.25, 0.5, 0.75]
ewmas = {
ewma_name(decay_rate): EWMA(
inputs=(USEquityPricing.close,),
window_length=window_length,
decay_rate=decay_rate,
)
for decay_rate in decay_rates
}
ewmstds = {
ewmstd_name(decay_rate): EWMSTD(
inputs=(USEquityPricing.close,),
window_length=window_length,
decay_rate=decay_rate,
)
for decay_rate in decay_rates
}
all_results = self.engine.run_pipeline(
Pipeline(columns=merge(ewmas, ewmstds)),
self.dates[window_length],
self.dates[-1],
)
for decay_rate in decay_rates:
ewma_result = all_results[ewma_name(decay_rate)].unstack()
ewma_expected = self.expected_ewma(window_length, decay_rate)
assert_frame_equal(ewma_result, ewma_expected)
ewmstd_result = all_results[ewmstd_name(decay_rate)].unstack()
ewmstd_expected = self.expected_ewmstd(window_length, decay_rate)
assert_frame_equal(ewmstd_result, ewmstd_expected)
def do(self, which_callback, *args):
iterations_done = self.main_loop.status['iterations_done']
if self.burnin <= iterations_done:
# Save the model here
iterations_done = self.main_loop.status['iterations_done']
filename = os.path.join(
self.saveto, 'params_iter{}.npz'.format(iterations_done))
s = signal.signal(signal.SIGINT, signal.SIG_IGN)
logger.info(" Incremental dump {}".format(filename))
params_to_save = []
for cg_name in self.main_loop.models.keys():
params_to_save.append(
self.main_loop.models[cg_name].get_param_values())
params_to_save = merge(params_to_save)
secure_numpy_save(params_to_save, filename)
if self.save_iter_state:
filename_is = os.path.join(
self.saveto,
'iterations_state_iter{}.pkl'.format(iterations_done))
logger.info(" Incremental dump {}".format(filename_is))
secure_pickle_dump(self.main_loop.iteration_state, filename_is)
if self.save_log:
filename_log = os.path.join(
self.saveto,
'log_iter{}'.format(iterations_done))
logger.info(" Incremental dump {}".format(filename_log))
secure_pickle_dump(self.main_loop.log, filename_log)
signal.signal(signal.SIGINT, s)
def dump_parameters(self, main_loop):
params_to_save = []
for model in main_loop.models.values():
params_to_save.append(model.get_param_values())
secure_numpy_save(merge(params_to_save),
self.path_to_parameters)
def get_params(self):
return merge(self.encoder.get_params(),
self.decoder.get_params())
def _save_model(self, bleu_score):
if self._is_valid_to_save(bleu_score):
model = ModelInfo(
bleu_score, self.saveto, self.enc_id, self.dec_id)
# Manage n-best model list first
if len(self.best_models) >= self.track_n_models:
old_model = self.best_models[0]
if old_model.path and os.path.isfile(old_model.path):
logger.info("Deleting old model %s" % old_model.path)
os.remove(old_model.path)
self.best_models.remove(old_model)
self.best_models.append(model)
self.best_models.sort(key=operator.attrgetter('bleu_score'))
# Save the model here
s = signal.signal(signal.SIGINT, signal.SIG_IGN)
logger.info("Saving new model {}".format(model.path))
params_to_save = []
for cg_name in self.main_loop.models.keys():
params_to_save.append(
self.main_loop.models[cg_name].get_param_values())
params_to_save = merge(params_to_save)
self._save_params(model, params_to_save)
self._save_bleu_scores()
signal.signal(signal.SIGINT, s)
def index(self):
"""Return dask Index instance"""
name = self._name + '-index'
dsk = {(name, i): (getattr, key, 'index')
for i, key in enumerate(self._keys())}
return Index(merge(dsk, self.dask), name,
self._meta.index, self.divisions)
def head(self, n=5, npartitions=1, compute=True):
""" First n rows of the dataset
Parameters
----------
n : int, optional
The number of rows to return. Default is 5.
npartitions : int, optional
Elements are only taken from the first ``npartitions``, with a
default of 1. If there are fewer than ``n`` rows in the first
``npartitions`` a warning will be raised and any found rows
returned. Pass -1 to use all partitions.
compute : bool, optional
Whether to compute the result, default is True.
"""
if npartitions <= -1:
npartitions = self.npartitions
if npartitions > self.npartitions:
raise ValueError("only %d partitions, received "
"%d" % (self.npartitions, npartitions))
name = 'head-%d-%d-%s' % (npartitions, n, self._name)
if npartitions > 1:
name_p = 'head-partial-%d-%s' % (n, self._name)
dsk = {(name_p, i): (M.head, (self._name, i), n)
for i in range(npartitions)}
dsk[(name, 0)] = (M.head, (gd.concat, sorted(dsk)), n)
else:
dsk = {(name, 0): (M.head, (self._name, 0), n)}
res = new_dd_object(merge(self.dask, dsk), name, self._meta,
(self.divisions[0], self.divisions[npartitions]))
return res.compute() if compute else res
def concat(objs):
"""Concantenate dask gdf objects
Parameters
----------
objs : sequence of DataFrame, Series, Index
A sequence of objects to be concatenated.
"""
objs = [_daskify(x) for x in objs]
meta = gd.concat(_extract_meta(objs))
name = "concat-" + uuid4().hex
dsk = {}
divisions = [0]
base = 0
lastdiv = 0
for obj in objs:
for k, i in obj._keys():
dsk[name, base + i] = k, i
base += obj.npartitions
divisions.extend([d + lastdiv for d in obj.divisions[1:]])
lastdiv = obj.divisions[-1]
dasks = [o.dask for o in objs]
dsk = merge(dsk, *dasks)
return new_dd_object(dsk, name, meta, divisions)
def __getitem__(self, key):
if isinstance(key, str) and key in self.columns:
meta = self._meta[key]
name = 'getitem-%s' % tokenize(self, key)
dsk = {(name, i): (operator.getitem, (self._name, i), key)
for i in range(self.npartitions)}
return Series(merge(self.dask, dsk), name, meta, self.divisions)
raise NotImplementedError("Indexing with %r" % key)
def __init__(self, scheduler, name, user=getpass.getuser(),
master=os.getenv('MESOS_MASTER', 'zk://localhost:2181'),
failover_timeout=100, capabilities=None, principal=None, secret=None,
implicit_acknowledgements=True, handlers={}, loop=None):
self.loop = loop or IOLoop()
self.master = master
self.leading_master_seq = None
self.leading_master_info = None
self.scheduler = scheduler
self.framework = {
'user': user,
'name': name,
'capabilities': capabilities or [],
'failover_timeout': failover_timeout,
'hostname': socket.gethostname()
}
self.implicit_acknowledgements = implicit_acknowledgements
defaults = {Event.SUBSCRIBED: self.on_subscribed,
Event.OFFERS: self.on_offers,
Event.RESCIND: self.on_rescind,
Event.UPDATE: self.on_update,
Event.MESSAGE: self.on_message,
Event.RESCIND_INVERSE_OFFER: self.on_rescind_inverse,
Event.FAILURE: self.on_failure,
Event.ERROR: self.on_error,
Event.HEARTBEAT: self.on_heartbeat,
Event.OUTBOUND_SUCCESS: self.on_outbound_success,
Event.OUTBOUND_ERROR: self.on_outbound_error}
self.handlers = merge(defaults, handlers)
self.subscription = Subscription(self.framework, self.master,
'/api/v1/scheduler', self.handlers,
principal=principal,
secret=secret,
timeout=failover_timeout,
loop=self.loop)
def factor_load(start_date, end_date, factor_name, save_file=None, **kwargs):
"""
:param start_date: str, ???????????
:param end_date: str, ???????????
:param factor_name: str, ???????????
:param save_file: str, optional, ???????????? '*.csv' ?? '*.pkl'
:param kwargs: dict, optional
freq: str, optional, ???????? ??'M', 'W', 'S', 'Y'? ??enums.py - FreqType
tenor: str, optional, ???????? ?????????????????????????(??)? ????+FreqType? ?'3M'
sec_id, str/list, optional, ???????????
output_data_format: enum, optional, ??enums.py - FreqType
MULTI_INDEX_DF: multi-index DataFrame, index=[date, secID], value = factor
PIVOT_TABLE_DF: DataFrame, index=date, columns = secID
is_index: bool, optional, True: ???sec_id????????????????????????
False: ????sec_id?????
date_format: str, optional, ?????? ??'%Y-%m-%d'
:return: pd.DataFrame ????????
"""
if isinstance(factor_name, list):
kwargs = merge(kwargs, {'output_data_format': OutputFormat.MULTI_INDEX_DF})
factor_names = factor_name
else:
factor_names = [factor_name]
ret = pd.DataFrame()
for factor_name in factor_names:
LOGGER.info('Loading factor data {0}'.format(factor_name))
factor_loader = FactorLoader(start_date=start_date,
end_date=end_date,
factor_name=factor_name,
**kwargs)
factor_data = factor_loader.load_data()
LOGGER.info('factor data {0} is loaded '.format(factor_name))
ret = pd.concat([ret, factor_data], axis=1)
if kwargs.get('reset_col_names'):
ret.columns = factor_names
if save_file:
save_data_to_file(ret, save_file)
LOGGER.critical('Data saved in {0}'.format(save_file))
return ret
def __init__(self, volume_limit, eta=ROOT_SYMBOL_TO_ETA):
super(VolatilityVolumeShare, self).__init__()
self.volume_limit = volume_limit
# If 'eta' is a constant, use a dummy mapping to treat it as a
# dictionary that always returns the same value.
# NOTE: This dictionary does not handle unknown root symbols, so it may
# be worth revisiting this behavior.
if isinstance(eta, (int, float)):
self._eta = DummyMapping(float(eta))
else:
# Eta is a dictionary. If the user's dictionary does not provide a
# value for a certain contract, fall back on the pre-defined eta
# values per root symbol.
self._eta = merge(ROOT_SYMBOL_TO_ETA, eta)
def __init__(self,
cost,
exchange_fee,
min_trade_cost=DEFAULT_MINIMUM_COST_PER_FUTURE_TRADE):
# If 'cost' or 'exchange fee' are constants, use a dummy mapping to
# treat them as a dictionary that always returns the same value.
# NOTE: These dictionary does not handle unknown root symbols, so it
# may be worth revisiting this behavior.
if isinstance(cost, (int, float)):
self._cost_per_contract = DummyMapping(float(cost))
else:
# Cost per contract is a dictionary. If the user's dictionary does
# not provide a commission cost for a certain contract, fall back
# on the pre-defined cost values per root symbol.
self._cost_per_contract = defaultdict(
lambda: DEFAULT_PER_CONTRACT_COST, **cost
)
if isinstance(exchange_fee, (int, float)):
self._exchange_fee = DummyMapping(float(exchange_fee))
else:
# Exchange fee is a dictionary. If the user's dictionary does not
# provide an exchange fee for a certain contract, fall back on the
# pre-defined exchange fees per root symbol.
self._exchange_fee = merge(
FUTURE_EXCHANGE_FEES_BY_SYMBOL, exchange_fee,
)
self.min_trade_cost = min_trade_cost or 0
def load_adjusted_array(self, columns, dates, sids, mask):
n, p = self.split_next_and_previous_event_columns(columns)
return merge(
self.load_next_events(n, dates, sids, mask),
self.load_previous_events(p, dates, sids, mask),
)
def test_ewm_stats(self, window_length):
def ewma_name(decay_rate):
return 'ewma_%s' % decay_rate
def ewmstd_name(decay_rate):
return 'ewmstd_%s' % decay_rate
decay_rates = [0.25, 0.5, 0.75]
ewmas = {
ewma_name(decay_rate): EWMA(
inputs=(USEquityPricing.close,),
window_length=window_length,
decay_rate=decay_rate,
)
for decay_rate in decay_rates
}
ewmstds = {
ewmstd_name(decay_rate): EWMSTD(
inputs=(USEquityPricing.close,),
window_length=window_length,
decay_rate=decay_rate,
)
for decay_rate in decay_rates
}
all_results = self.engine.run_pipeline(
Pipeline(columns=merge(ewmas, ewmstds)),
self.dates[window_length],
self.dates[-1],
)
for decay_rate in decay_rates:
ewma_result = all_results[ewma_name(decay_rate)].unstack()
ewma_expected = self.expected_ewma(window_length, decay_rate)
assert_frame_equal(ewma_result, ewma_expected)
ewmstd_result = all_results[ewmstd_name(decay_rate)].unstack()
ewmstd_expected = self.expected_ewmstd(window_length, decay_rate)
assert_frame_equal(ewmstd_result, ewmstd_expected)
def json_repr(self):
return merge(self.to_map(), dict(__type__=self.__path__))
def ribosome_file_logging(name: str, file_kw: dict=dict()) -> None:
prefix_path = options.nvim_log_file.value | (lambda: amino.logging.log_dir() / 'nvim')
level = (
DDEBUG
if options.development and options.spec else
options.file_log_level.value | logging.DEBUG
)
logfile = Path(f'{prefix_path}_ribo_{name}_{os.getpid()}')
kw = merge(
file_kw,
dict(level=level, logfile=logfile)
)
return amino_root_file_logging(**kw)
def main():
global parser
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--output-html', help='Output result page HTML to file')
parser.add_argument('--saisies', dest='saisie_variables', metavar='nom=valeur', nargs='+', help='Variables saisies')
parser.add_argument('--year', default='2015', type=int,
help='Calculer les impôts de l\'année N sur les revenus de l\'année N-1')
args = parser.parse_args()
cgi_url = 'http://www3.finances.gouv.fr/cgi-bin/calc-{}.cgi'.format(args.year)
headers = {'User-Agent': 'Calculette-Impots-Python'}
saisie_variables = {} if args.saisie_variables is None else dict(iter_saisie_variables(args.saisie_variables))
default_saisie_variables = {
# '0DA': '1965',
# '1AJ': '15000',
'pre_situation_famille': 'C',
'pre_situation_residence': 'M',
# 'simplifie': '1',
}
data = merge(default_saisie_variables, saisie_variables)
response = requests.post(cgi_url, headers=headers, data=data)
if args.output_html is not None:
with open(args.output_html, 'w') as output_html_file:
output_html_file.write(re.sub(
pattern=r'=(.)/calcul_impot/2015/',
repl=r'=\1http://www3.finances.gouv.fr/calcul_impot/2015/',
string=response.text,
))
root_node = etree.fromstring(response.text, etree.HTMLParser())
results = list(iter_results(root_node))
print(json.dumps(results, ensure_ascii=False, indent=2, sort_keys=True))
return 0
def run_all(values, base, get=get_proc, num_workers = 4):
full_dask = toolz.merge(val.dask for val in values)
full_keys = [val._key for val in values]
cache = {}
if exists("{}.cache".format(base["prefix"])):
with open("{}.cache".format(base["prefix"]), "r") as f:
cache = json.load(f)
full_dask.update(cache)
with ProgressBar(), NekCallback(base) as rprof:
res = get(full_dask, full_keys, cache=cache, num_workers=num_workers, optimize_graph=False)
return res
def params(self):
return merge(*self._params)
def _influxdb_writer_for(influxdb_client, measurement):
mes_dict = {"measurement": measurement}
def to_influxdf(*data_dicts):
merged_dicts = merge(mes_dict, *data_dicts)
logger.debug(merged_dicts)
if influxdb_client.write_points([merged_dicts]):
logger.debug("Success")
else:
logger.info("FAIL")
return to_influxdf
def _add_tags(tags, json_dict):
json_dict['tags'] = merge(json_dict['tags'], tags)
return json_dict