def __init__(self, n_primes, n_composed, features_size, markov_order,
temperature=1.0, min_len_definitions=2, max_len_definitions=4):
"""
markov_order: integer at least 1 such that
p(x_t|x_t-1:x_1) = p(x_t|x_t-1:x_t-markov_order)
temperature: temperature for softmax
"""
self.mo = markov_order
self.np = n_primes
self.nc = n_composed
self.V = self.np + self.nc
self.T = temperature
self.min_len_def = min_len_definitions
self.max_len_def = max_len_definitions
self.features_size = features_size
# tokens are composed of a..z letters
alphabet = ''.join([chr(c) for c in range(97, 97+26)]) # str(a..z)
# tokens all have the same size tok_len
self.tok_len = int(np.log(self.V) / np.log(len(alphabet)) + 1)
# enumerate all the tokens
self.vocabulary = []
for i, tok in zip(range(self.V),
itertools.product(alphabet, repeat=self.tok_len)):
self.vocabulary.append(''.join(tok))
self.params = uniform(0,1,(self.mo * features_size, self.V))
self.features = uniform(0,1,(self.V,features_size))
self.dictionary = {}
for i in range(self.np, self.np+self.nc):
# sample len of def, sample def, store in dictionary
# then compute the features as a rescaled mean of the features
len_diff = self.max_len_def - self.min_len_def
len_def = np.random.choice(len_diff) + self.min_len_def
definition = np.random.choice(self.np, size=len_def, replace=False)
tok = self.vocabulary[i]
self.dictionary[tok] = [self.vocabulary[e] for e in definition]
#factor = np.random.beta(a=3, b=2.5) # closer to 1 than 0
#factor = np.random.beta(a=1, b=3) # closer to 0 than 1
factor = 1#1/(8*self.nc)
f = factor * np.mean([self.features[e] for e in definition], axis=0)
self.features[i] = f
self.initial_features = uniform(0,1,(self.mo, features_size))
generate_synthetic_data_alt.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录