def getCorrelation(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
seq2 = []
for i in lines:
i = i.split("\t")
p1 = i[1]; p2 = i[2]; score = float(i[0])
if len(p1.split()[0].split('_')) == 2:
X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
else:
X1, X2 = getSeqs(p1,p2,words)
seq1.append(X1)
seq2.append(X2)
golds.append(score)
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
preds = np.squeeze(scores)
return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
python类prepare_data()的实例源码
evaluate.py 文件源码
项目:Learning-sentence-representation-with-guidance-of-human-attention
作者: wangshaonan
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def getCorrelation(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
seq2 = []
for i in lines:
i = i.split("\t")
p1 = i[0]; p2 = i[1]; score = float(i[2])
X1, X2 = getSeqs(p1,p2,words)
seq1.append(X1)
seq2.append(X2)
golds.append(score)
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
preds = np.squeeze(scores)
return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
evaluate.py 文件源码
项目:Learning-sentence-representation-with-guidance-of-human-attention
作者: wangshaonan
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def getAcc(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
seq2 = []
ct = 0
for i in lines:
i = i.split("\t")
p1 = i[1]; p2 = i[2]; score = i[0]
if len(p1.split()[0].split('_')) == 2:
X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
else:
X1, X2 = getSeqs(p1,p2,words)
seq1.append(X1)
seq2.append(X2)
ct += 1
if ct % 100 == 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
seq1 = []
seq2 = []
golds.append(score)
if len(seq1) > 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
return acc(preds,golds)
evaluate.py 文件源码
项目:Learning-sentence-representation-with-guidance-of-human-attention
作者: wangshaonan
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def getAcc_para(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
seq2 = []
ct = 0
for i in lines:
i = i.split("\t")
p1 = i[1]; p2 = i[2]; score = i[0]
if len(p1.split()[0].split('_')) == 2:
X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
else:
X1, X2 = getSeqs(p1,p2,words)
seq1.append(X1)
seq2.append(X2)
ct += 1
if ct % 100 == 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
seq1 = []
seq2 = []
golds.append(score)
if len(seq1) > 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
return acc_para(preds,golds)
evaluate.py 文件源码
项目:Learning-sentence-representation-with-guidance-of-human-attention
作者: wangshaonan
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def getAccSentiment(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
ct = 0
for i in lines:
i = i.split("\t")
p1 = i[0]; score = i[1]
X1 = getSeq(p1,words)
seq1.append(X1)
ct += 1
if ct % 100 == 0:
x1,m1 = utils.prepare_data(seq1)
scores = model.scoring_function(x1,m1)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
seq1 = []
golds.append(score)
if len(seq1) > 0:
x1,m1 = utils.prepare_data(seq1)
scores = model.scoring_function(x1,m1)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
return accSentiment(preds,golds)
ppdb_utils.py 文件源码
项目:Learning-sentence-representation-with-guidance-of-human-attention
作者: wangshaonan
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def getpairs(model, batch, params):
g1 = []
g2 = []
for i in batch:
g1.append(i[0].embeddings)
g2.append(i[1].embeddings)
g1x, g1mask = utils.prepare_data(g1)
g2x, g2mask = utils.prepare_data(g2)
embg1 = model.feedforward_function(g1x, g1mask)
embg2 = model.feedforward_function(g2x, g2mask)
for idx, i in enumerate(batch):
i[0].representation = embg1[idx, :]
i[1].representation = embg2[idx, :]
pairs = getPairsFast(batch, params.type)
p1 = []
p2 = []
for i in pairs:
p1.append(i[0].embeddings)
p2.append(i[1].embeddings)
p1x, p1mask = utils.prepare_data(p1)
p2x, p2mask = utils.prepare_data(p2)
return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
def evaluate(self, wopt, int_opt, X_test, base_model=None):
# get the true class labels
y_true = self.query(X_test)
if X_test.shape[1] != self.num_features():
X_test = self.encode(X_test)
# predict classes using the optimized coefficients
y_pred = predict_classes(X_test, wopt, int_opt, self.classes)
"""
_, _, X, _, _ = utils.prepare_data(self.model_id, onehot=False)
X = X.values
for i in range(len(y_true)):
if y_true[i] != y_pred[i]:
print y_true[i], y_pred[i], X[i]
"""
if base_model is not None:
y_pred_base = base_model.predict(X_test)
return accuracy_score(y_true, y_pred), \
accuracy_score(y_true, y_pred_base)
return accuracy_score(y_true, y_pred)
def gen_data(p, data, batch_size = 1):
# generate data for the model
# y in train data is a matrix (batch_size, seq_length)
# y in test data is an array
x = data['x'][p:p + batch_size]
y = data['y'][p:p + batch_size]
batch_data = {'x':x,'y':y}
if data.has_key('t'):
batch_data['t'] = data['t'][p:p + batch_size]
ret = utils.prepare_data(batch_data, VOCAB_SIZE, one_hot=ONE_HOT, sigmoid_on=SIGMOID_ON)
return ret
def getAcc(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
seq2 = []
ct = 0
for i in lines:
i = i.split("\t")
p1 = i[0]; p2 = i[1]; score = i[2]
X1, X2 = getSeqs(p1,p2,words)
seq1.append(X1)
seq2.append(X2)
ct += 1
if ct % 100 == 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
seq1 = []
seq2 = []
golds.append(score)
if len(seq1) > 0:
x1,m1 = utils.prepare_data(seq1)
x2,m2 = utils.prepare_data(seq2)
scores = model.scoring_function(x1,x2,m1,m2)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
return acc(preds,golds)
def getAccSentiment(model,words,f):
f = open(f,'r')
lines = f.readlines()
preds = []
golds = []
seq1 = []
ct = 0
for i in lines:
i = i.split("\t")
p1 = i[0]; score = i[1]
X1 = getSeq(p1,words)
seq1.append(X1)
ct += 1
if ct % 100 == 0:
x1,m1 = utils.prepare_data(seq1)
scores = model.scoring_function(x1,m1)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
seq1 = []
golds.append(score)
if len(seq1) > 0:
x1,m1 = utils.prepare_data(seq1)
scores = model.scoring_function(x1,m1)
scores = np.squeeze(scores)
preds.extend(scores.tolist())
return accSentiment(preds,golds)
def getpairs(model, batch, params):
g1 = []
g2 = []
for i in batch:
g1.append(i[0].embeddings)
g2.append(i[1].embeddings)
g1x, g1mask = utils.prepare_data(g1)
g2x, g2mask = utils.prepare_data(g2)
embg1 = model.feedforward_function(g1x, g1mask)
embg2 = model.feedforward_function(g2x, g2mask)
for idx, i in enumerate(batch):
i[0].representation = embg1[idx, :]
i[1].representation = embg2[idx, :]
pairs = getPairsFast(batch, params.type)
p1 = []
p2 = []
for i in pairs:
p1.append(i[0].embeddings)
p2.append(i[1].embeddings)
p1x, p1mask = utils.prepare_data(p1)
p2x, p2mask = utils.prepare_data(p2)
return (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('data', type=str, help='a dataset')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--incomplete', dest='incomplete',
action='store_true', help='allow incomplete queries')
args = parser.parse_args()
dataset = args.data
seed = args.seed
incomplete = args.incomplete
verbose = args.verbose
if verbose:
level = logging.INFO
logger = logging.getLogger()
logger.setLevel(level)
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(level)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
np.random.seed(seed)
_, _, X, _, _ = utils.prepare_data(dataset, onehot=False, labelEncode=False)
cat_idx = [i for i in range(len(X.columns))
if isinstance(X.iloc[0][i], basestring)]
cont_idx = range(X.shape[1])
for i in cat_idx:
cont_idx.remove(i)
X = X[cat_idx + cont_idx].values
ext = AWSRegressionExtractor(dataset, X.copy(), cat_idx,
incomplete=incomplete)
try:
X_test = X[0:500]
if ext.binning:
r = -decimal.Decimal(str(ext.eps)).as_tuple().exponent
for i, t in enumerate(ext.feature_types):
if t == "NUMERIC":
X_test[:, i] = np.round(X_test[:, i].astype(np.float), r)
except ValueError:
X_test = None
ext.run(args.data, X_test, 500, random_seed=seed,
alphas=[1], methods=['passive'], baseline=False)