def test_leak_in_structured_dtype_comparison(self):
# gh-6250
recordtype = np.dtype([('a', np.float64),
('b', np.int32),
('d', (np.str, 5))])
# Simple case
a = np.zeros(2, dtype=recordtype)
for i in range(100):
a == a
assert_(sys.getrefcount(a) < 10)
# The case in the bug report.
before = sys.getrefcount(a)
u, v = a[0], a[1]
u == v
del u, v
gc.collect()
after = sys.getrefcount(a)
assert_equal(before, after)
python类str()的实例源码
def add_node_attribute(inFile, pedgraph, animal=1, atCol=4, atName="attr1"):
"""
inFile - pedigree as .txt file
pedgraph - Pedigree as a networkX graph object
animal - column for the animal ID
atCol - column for the attribute
atName - name for the attribute
"""
ped_df = pd.read_table(inFile, header=None, delim_whitespace=True)
#print ped_df
dic_ped = dict(zip(ped_df[animal - 1], ped_df[atCol - 1]))
#print dic_ped
correct_dic_ped = {str(k):int(v) for k,v in dic_ped.items()}
#print correct_dic_ped
for node, value in dic_ped.items():
pedgraph.node[str(node)]["EBV"] = value
return correct_dic_ped
def add_ebv_attribute(inFile, pedgraph, animal=1, atCol=4, atName="attr1"):
"""
inFile - pedigree as .txt file
pedgraph - Pedigree as a networkX graph object
animal - column for the animal ID
atCol - column for the attribute
atName - name for the attribute
"""
ped_df = pd.read_table(inFile, header=None, delim_whitespace=True)
#print ped_df
dic_ped = dict(zip(ped_df[animal - 1], ped_df[atCol - 1]))
#print dic_ped
correct_dic_ped = {str(k):int(-v) for k,v in dic_ped.items()}
#print correct_dic_ped
for node, value in dic_ped.items():
pedgraph.node[str(node)]["EBV"] = value
return correct_dic_ped
def __init__(self,filename='word2vec.pklz'):
"""
Py Word2vec??
"""
super().__init__()
self.name='word2vec'
self.load(filename)
self.vocab_cnt=len(self)
self.dims=self[list(self.keys())[0]].shape[0]
print('???:' + str(self.vocab_cnt))
print('???:' + str(self.dims))
self.word2idx= {w: i for i, w in enumerate(self.keys())}
self.idx2word= {i: w for i, w in enumerate(self.keys())}
self._matrix =np.array(list(self.values()))
print(self._matrix.shape)
def get_antonyms(self,wordA:str, topk:int=10,ispositive:bool=True):
seed=[['??','??'],['??','??'],['??','??'],['??','??'],['??','??']]
proposal={}
for pair in seed:
if ispositive:
result=self.analogy(pair[0],pair[1],wordA,topk)
print(w2v.find_nearest_word((self[pair[0]] + self[pair[1]]) / 2, 3))
else:
result = self.analogy(pair[1], pair[0], wordA, topk)
print(w2v.find_nearest_word((self[pair[0]] + self[pair[1]]) / 2, 3))
for item in result:
term_products = np.argwhere(self[wordA] * self[item[0]] < 0)
#print(item[0] + ':' +wordA + str(term_products))
#print(item[0] + ':' +wordA+'('+str(pair)+') '+ str(len(term_products)))
if len(term_products)>=self.dims/4:
if item[0] not in proposal:
proposal[item[0]] = item[1]
elif item[1]> proposal[item[0]]:
proposal[item[0]] +=item[1]
for k,v in proposal.items():
proposal[k]=v/len(seed)
sortitems=sorted(proposal.items(), key=lambda d: d[1],reverse=True)
return [sortitems[i] for i in range(min(topk,len(sortitems)))]
def test_leak_in_structured_dtype_comparison(self):
# gh-6250
recordtype = np.dtype([('a', np.float64),
('b', np.int32),
('d', (np.str, 5))])
# Simple case
a = np.zeros(2, dtype=recordtype)
for i in range(100):
a == a
assert_(sys.getrefcount(a) < 10)
# The case in the bug report.
before = sys.getrefcount(a)
u, v = a[0], a[1]
u == v
del u, v
gc.collect()
after = sys.getrefcount(a)
assert_equal(before, after)
def __init__(self):
#??????????????data????????
current = os.getcwd()
folder = os.path.join(current, 'data')
if os.path.exists(folder) == False:
os.mkdir(folder)
os.chdir(folder)
#??tushare?????A???
#df0=ts.get_stock_basics()
df0=pd.read_csv('bases.csv',dtype={'code':np.str})
self.bases=df0.sort_values('timeToMarket',ascending=False)
#????? ????????????
self.cxg=self.bases[(self.bases['timeToMarket']>20170101) & (self.bases['timeToMarket']<20170401)]
self.codes= self.cxg['code'].values
def getBigDeal(self, code,vol):
df = ts.get_today_ticks(code)
t= df[df['volume']>vol]
s=df[df['amount']>100000000]
print '\n'
if t.size!=0:
print "Big volume"
print self.base[self.base['code']==str(code)]['name'].values[0]
print t
if s.size!=0:
print "Big amount: "
print self.base[self.base['code']==str(code)]['name'].values[0]
print s
r=df[df['volume']>vol*10]
if r.size!=0:
print "Super amount:"
print self.base[self.base['code']==str(code)]['name'].values[0]
print r
def years(self):
df_list=[]
k=[str(i) for i in range(1,13)]
print k
j=[i for i in range(1,13)]
result=[]
for i in range(1,13):
filename='2016-%s.xls' %str(i).zfill(2)
#print filename
t=pd.read_table(filename,encoding='gbk',dtype={u'????':np.str})
fee=t[u'???'].sum()+t[u'???'].sum()+t[u'????'].sum()
print i," fee: "
print fee
df_list.append(t)
result.append(fee)
df=pd.concat(df_list,keys=k)
#print df
#df.to_excel('2016_delivery_order.xls')
self.caculation(df)
plt.plot(j,result)
plt.show()
def getTotal():
path=os.path.join(os.getcwd(),'data')
os.chdir(path)
all=pd.read_csv('bases.csv',dtype={'code':np.str})
#print all
all_code=all['code'].values
#print all_code
lists=[]
for i in all_code:
df=ts.get_k_data(i,start='2017-07-17',end='2017-07-17')
lists.append(df)
all_df=pd.DataFrame(lists)
print all_df
all_df.to_csv('2017-all.csv',encoding='gbk')
all_df.to_excel('2017-excel.xls')
def add_code_redis():
rds = redis.StrictRedis(REDIS_HOST, 6379, db=0)
rds_1 = redis.StrictRedis(REDIS_HOST, 6379, db=1)
df = ts.get_stock_basics()
df = df.reset_index()
# ?????
if rds.dbsize() != 0:
rds.flushdb()
if rds_1.dbsize() != 0:
rds_1.flushdb()
for i in range(len(df)):
code, name, timeToMarket = df.loc[i]['code'], df.loc[i]['name'], df.loc[i]['timeToMarket']
# print str(timeToMarket)
d = dict({code: ':'.join([name, str(timeToMarket)])})
# print d
rds.set(code, name)
rds_1.lpush('codes', d)
def read_cufflinks(sample_path, isoforms=False):
''' Function for reading a Cufflinks quantification result.
Returns
-------
A pandas.Series with the expression values in the sample.
'''
if isoforms:
quant_file = sample_path + '/isoforms.fpkm_tracking'
else:
quant_file = sample_path + '/genes.fpkm_tracking'
df = pd.read_table(quant_file, engine='c',
usecols=['tracking_id', 'FPKM'],
index_col=0,
dtype={'tracking_id': np.str, 'FPKM': np.float64})
df['tracking_id'] = df.index
df = df.groupby('tracking_id').sum()
df['TPM'] = df['FPKM'] / df['FPKM'].sum() * 1e6
df = df.rename(columns={'tracking_id': 'target_id'})
return df['TPM']
def tensor2state(tensor_frd, tensor_emy):
'''
transform tensor 2 state
tensor_frd, tensor_emy ndarray [9,10,16]
return state ndarray [10,9]
'''
assert tensor_frd.shape == tensor_emy.shape
state = np.zeros((10,9), dtype=np.str)
chessfrdplayer = 'KAABBNNRRCCPPPPP'
chessemyplayer = 'kaabbnnrrccppppp'
for i in range(tensor_frd.shape[0]):
for j in range(tensor_frd.shape[1]):
if ~(tensor_frd[i][j] == 0).all():
layer = np.argmax(tensor_frd[i][j])
state[j][i] = chessfrdplayer[layer]
elif ~(tensor_emy[i][j] == 0).all():
layer = np.argmax(tensor_emy[i][j])
state[j][i] = chessemyplayer[layer]
else:
state[j][i] = ' '
return state
def tensor2state(tensor_frd, tensor_emy):
'''
transform tensor 2 state
tensor_frd, tensor_emy ndarray [9,10,16]
return state ndarray [10,9]
'''
assert tensor_frd.shape == tensor_emy.shape
state = np.zeros((10,9), dtype=np.str)
chessfrdplayer = 'KAABBNNRRCCPPPPP'
chessemyplayer = 'kaabbnnrrccppppp'
for i in range(tensor_frd.shape[0]):
for j in range(tensor_frd.shape[1]):
if ~(tensor_frd[i][j] == 0).all():
layer = np.argmax(tensor_frd[i][j])
state[j][i] = chessfrdplayer[layer]
elif ~(tensor_emy[i][j] == 0).all():
layer = np.argmax(tensor_emy[i][j])
state[j][i] = chessemyplayer[layer]
else:
state[j][i] = ' '
return state
def tensor2state(tensor_frd, tensor_emy):
'''
transform tensor 2 state
tensor_frd, tensor_emy ndarray [9,10,16]
return state ndarray [10,9]
'''
assert tensor_frd.shape == tensor_emy.shape
state = np.zeros((10,9), dtype=np.str)
chessfrdplayer = 'KAABBNNRRCCPPPPP'
chessemyplayer = 'kaabbnnrrccppppp'
for i in range(tensor_frd.shape[0]):
for j in range(tensor_frd.shape[1]):
if ~(tensor_frd[i][j] == 0).all():
layer = np.argmax(tensor_frd[i][j])
state[j][i] = chessfrdplayer[layer]
elif ~(tensor_emy[i][j] == 0).all():
layer = np.argmax(tensor_emy[i][j])
state[j][i] = chessemyplayer[layer]
else:
state[j][i] = ' '
return state
def tensor2state(tensor_frd, tensor_emy):
'''
transform tensor 2 state
tensor_frd, tensor_emy ndarray [9,10,16]
return state ndarray [10,9]
'''
assert tensor_frd.shape == tensor_emy.shape
state = np.zeros((10,9), dtype=np.str)
chessfrdplayer = 'KAABBNNRRCCPPPPP'
chessemyplayer = 'kaabbnnrrccppppp'
for i in range(tensor_frd.shape[0]):
for j in range(tensor_frd.shape[1]):
if ~(tensor_frd[i][j] == 0).all():
layer = np.argmax(tensor_frd[i][j])
state[j][i] = chessfrdplayer[layer]
elif ~(tensor_emy[i][j] == 0).all():
layer = np.argmax(tensor_emy[i][j])
state[j][i] = chessemyplayer[layer]
else:
state[j][i] = ' '
return state
def tensor2state(tensor_frd, tensor_emy):
'''
transform tensor 2 state
tensor_frd, tensor_emy ndarray [9,10,16]
return state ndarray [10,9]
'''
assert tensor_frd.shape == tensor_emy.shape
state = np.zeros((10,9), dtype=np.str)
chessfrdplayer = 'KAABBNNRRCCPPPPP'
chessemyplayer = 'kaabbnnrrccppppp'
for i in range(tensor_frd.shape[0]):
for j in range(tensor_frd.shape[1]):
if ~(tensor_frd[i][j] == 0).all():
layer = np.argmax(tensor_frd[i][j])
state[j][i] = chessfrdplayer[layer]
elif ~(tensor_emy[i][j] == 0).all():
layer = np.argmax(tensor_emy[i][j])
state[j][i] = chessemyplayer[layer]
else:
state[j][i] = ' '
return state
def _get_value(self, var: str):
"""
Utility method to return the value of the specified variable for this instance in the backing xarray data set.
Parameters
----------
var: str
Name of the variable. There should be no reason to pass a str directly. Instead, the names defined in the
_DataVar class should be used.
Returns
-------
depending on variable
The value of the specified variable for this instance
"""
return self._data[var][dict(instance=self._instance)]
def contains(self,
filename: str,
chunk_nr: int) -> bool:
"""
Check whether this data set contains an instance with the specified filename and chunk number.
Parameters
----------
filename: str
The filename of the instance
chunk_nr: int
The chunk number of the instance
Returns
-------
bool
True, if this data set contains an instance with the specified filename and chunk number, False otherwise
"""
if filename not in self._data[_DataVar.FILENAME].values:
return False
instances_with_filename = self._data.where(self._data[_DataVar.FILENAME] == filename)
return chunk_nr in instances_with_filename[_DataVar.CHUNK_NR].values
def labels_nominal(self) -> np.ndarray:
"""
Returns the nominal labels of all instances in this data set as a NumPy array.
The order of labels in the returned array matches the order in which instances are stored in this data set.
Returns
-------
numpy.ndarray
The nominal labels of the instances in this data set
Raises
------
AttributeError
If the data set is not fully labeled
"""
if not self.is_fully_labeled:
raise AttributeError("data set does not have label information")
return self._data[_DataVar.LABEL_NOMINAL].values.astype(np.str)
def save(self, path: Path):
"""
Writes this data set to the specified path.
Any directories in the path that do not exist are automatically created.
Parameters
----------
path: pathlib.Path
"""
if not path.parent.exists():
path.parent.mkdir(parents=True)
self.log.info("writing data set as netCDF4 to %s", path)
self._data.to_netcdf(path=str(path),
engine="netcdf4",
format="NETCDF4")
test_regression.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def test_leak_in_structured_dtype_comparison(self):
# gh-6250
recordtype = np.dtype([('a', np.float64),
('b', np.int32),
('d', (np.str, 5))])
# Simple case
a = np.zeros(2, dtype=recordtype)
for i in range(100):
a == a
assert_(sys.getrefcount(a) < 10)
# The case in the bug report.
before = sys.getrefcount(a)
u, v = a[0], a[1]
u == v
del u, v
gc.collect()
after = sys.getrefcount(a)
assert_equal(before, after)
def test_leak_in_structured_dtype_comparison(self):
# gh-6250
recordtype = np.dtype([('a', np.float64),
('b', np.int32),
('d', (np.str, 5))])
# Simple case
a = np.zeros(2, dtype=recordtype)
for i in range(100):
a == a
assert_(sys.getrefcount(a) < 10)
# The case in the bug report.
before = sys.getrefcount(a)
u, v = a[0], a[1]
u == v
del u, v
gc.collect()
after = sys.getrefcount(a)
assert_equal(before, after)
test_chamber_of_deputies_dataset.py 文件源码
项目:serenata-toolbox
作者: datasciencebr
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def _read_xz(self, filepath):
dtype = {
'applicant_id': np.str,
'batch_number': np.str,
'cnpj_cpf': np.str,
'congressperson_document': np.str,
'congressperson_id': np.str,
'document_id': np.str,
'document_number': np.str,
'document_type': np.str,
'leg_of_the_trip': np.str,
'passenger': np.str,
'reimbursement_number': np.str,
'subquota_group_description': np.str,
'subquota_group_id': np.str,
'subquota_number': np.str,
'term_id': np.str,
}
return pd.read_csv(filepath, dtype=dtype)
def read_csv(self, name):
filepath = os.path.join(self.path, name)
log.info('Loading {}…'.format(name))
dtype = {
'applicant_id': np.str,
'batch_number': np.str,
'cnpj_cpf': np.str,
'congressperson_document': np.str,
'congressperson_id': np.str,
'document_id': np.str,
'document_number': np.str,
'document_type': np.str,
'leg_of_the_trip': np.str,
'passenger': np.str,
'reimbursement_number': np.str,
'subquota_group_description': np.str,
'subquota_group_id': np.str,
'subquota_number': np.str,
'term_id': np.str,
}
return pd.read_csv(filepath, dtype=dtype)
def pcaCreate(image_files,dir,name_num, dir_list):
image_list = []
new_file_name = dir
save_dir = dir_list + new_file_name
save_dir_tt = save_dir + "\\"
for image_file in image_files:
image_list.append(misc.imread(image_file))
for image in image_list:
img = np.asarray(image, dtype='float32')
img = img / 255.
img_size = img.size / 3
img1 = img.reshape(img_size, 3)
img1 = np.transpose(img1)
img_cov = np.cov([img1[0], img1[1], img1[2]])
lamda, p = np.linalg.eig(img_cov)
p = np.transpose(p)
alpha1 = random.normalvariate(0, 0.3)
alpha2 = random.normalvariate(0, 0.3)
alpha3 = random.normalvariate(0, 0.3)
v = np.transpose((alpha1 * lamda[0], alpha2 * lamda[1], alpha3 * lamda[2]))
add_num = np.dot(p, v)
img2 = np.array([img[:, :, 0] + add_num[0], img[:, :, 1] + add_num[1], img[:, :, 2] + add_num[2]])
img2 = np.swapaxes(img2, 0, 2)
img2 = np.swapaxes(img2, 0, 1)
misc.imsave(save_dir_tt + np.str(name_num) + '.jpg', img2)
name_num += 1
return image_list
def dataset(self):
path = self.update_datasets()
self._dataset = pd.read_csv(path, dtype={'cnpj_cpf': np.str}, encoding='utf-8')
self.prepare_dataset()
return self._dataset
def setUp(self):
self.dataset = pd.read_csv('rosie/core/tests/fixtures/invalid_cnpj_cpf_classifier.csv',
dtype={'recipient_id': np.str})
self.subject = InvalidCnpjCpfClassifier()
test_monthly_subquota_limit_classifier.py 文件源码
项目:rosie
作者: datasciencebr
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def setUp(self):
self.full_dataset = pd.read_csv(
self.MONTHLY_SUBQUOTA_LIMIT_FIXTURE_FILE, dtype={'subquota_number': np.str})
self.dataset = self.full_dataset[
['applicant_id', 'subquota_number', 'issue_date', 'year', 'month', 'net_value']]
self.test_result_dataset = self.full_dataset[['expected_prediction', 'test_case_description']]
self.subject = MonthlySubquotaLimitClassifier()
self.subject.fit_transform(self.dataset)
self.prediction = self.subject.predict(self.dataset)
test_meal_price_outlier_classifier.py 文件源码
项目:rosie
作者: datasciencebr
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def setUp(self):
self.dataset = pd.read_csv('rosie/chamber_of_deputies/tests/fixtures/meal_price_outlier_classifier.csv',
dtype={'recipient_id': np.str})
self.subject = MealPriceOutlierClassifier()
self.subject.fit(self.dataset)