def exportGenre(df, genre):
print ("Now exporting ", genre)
pd.set_option('display.width', 120)
df_genre = df[df['genre'] == genre]
print(df_genre.shape)
df_sample = df_genre.ix[np.random.choice(df_genre.index, 10000, replace=False)]
#print(df_sample)
print(df_sample.shape)
with open("lyrics/" + genre + ".txt", "a") as f:
for index, row in df_sample.iterrows():
f.write("<S>\n" + row['lyrics'] + "\n<E>\n")
python类set_option()的实例源码
def print_full(x):
pd.set_option('display.max_rows', len(x))
print(x)
pd.reset_option('display.max_rows')
def result():
pd.set_option('display.max_rows', 1000)
profit_to_loss()
# best_rand_comb()
def show_data(x):
df = read_list("Tick/"+get_time(),
['last_price','volume','bp1','bo1','bq1','ap1','ao1','aq1',
'bp2','bo2','bq2','ap2','ao2','aq2','instrument_token','timestamp'])
df = df.query('instrument_token == '+str(x))
pd.set_option('display.max_rows', len(df))
print df
def download_data(quote,day=0):
days = day+1
url1='http://www.google.com/finance/getprices?q='
url2='&x=NSE&i=60&p='
url3='d&f=d,c,o,h,l,v&df=cpct&auto=1&ts=1266701290218'
#Not using the ts=1266701290218 parameter, if something goes wrong, do try it
df = pd.read_csv(url1+quote+url2+str(days)+url3,header=4,parse_dates=True,
skiprows=[5,6,7])
# print df
pd.set_option('display.max_rows', 100)
if(days>1):
i=0
for i in range(2,len(df)):
# print df.iat[i,0]
if(str(df.iat[i,0]).startswith('a')):
# print "the next day readings start form " + str(df.iat[i,0])
df.iat[i,0] = df.iat[i,0][1:]
try:
if(int(df.iat[i,0])-int(df.iat[i-2,0])<0):
break
except:
print df
continue
#i=df.index.get_loc('a',method='ffill')
df=df.ix[0:i-2]
# print df
df.columns = ['DATE', 'CLOSE','HIGH','LOW','OPEN','VOLUME']
df=df.set_index('DATE')
#print df
return df
def main():
"""Run Eskapade
Top-level control function for an Eskapade run started from the
command line. Arguments specified by the user are parsed and
converted to settings in the configuration object. Optionally, an
interactive IPython session is started when the run is finished.
"""
# create parser for command-line arguments
parser = create_arg_parser()
user_args = parser.parse_args()
# create config object for settings
if not user_args.unpickle_config:
# create new config
settings = ConfigObject()
else:
# read previously persisted settings if pickled file is specified
conf_path = user_args.config_files.pop(0)
settings = ConfigObject.import_from_file(conf_path)
del user_args.unpickle_config
# set configuration macros
settings.add_macros(user_args.config_files)
# set user options
settings.set_user_opts(user_args)
# run Eskapade
core.execution.run_eskapade(settings)
# start interpreter if requested (--interactive on command line)
if settings.get('interactive'):
# create process manager, config object, and data store
proc_mgr = ProcessManager()
settings = proc_mgr.service(ConfigObject)
ds = proc_mgr.service(DataStore)
# set Pandas display options
pd.set_option('display.width', 120)
pd.set_option('display.max_columns', 50)
# start interactive session
log = logging.getLogger(__name__)
log.info("Continuing interactive session ... press Ctrl+d to exit.\n")
IPython.embed()
def main():
desired_width = 600
pd.set_option('display.width', desired_width)
# specify sentence/true headline/predicted headline path.
sentence_path = './dataset/test_enc.txt'
true_headline_path = "./dataset/test_dec.txt"
predicted_headline_path = "./output/predicted_test_headline.txt"
# specify number of lines to read.
number_of_lines_read = 400
with open(true_headline_path) as ft:
print("reading actual headlines...")
true_headline = [next(ft).strip() for line in range(number_of_lines_read)]
ft.close()
with open(predicted_headline_path) as fp:
print("reading predicted headlines...")
predicted_headline = []
for line in range(number_of_lines_read):
predicted_headline.append(next(fp).strip())
fp.close()
# for debugging to detect empty predicted headlines (empty predicted headline will cause error while calculating BLEU)
# print (predicted_headline[88380])
# print (true_headline[88380])
with open(sentence_path) as f:
print("reading sentences...")
sentence = [next(f).strip() for line in range(number_of_lines_read)]
ft.close()
# For testing purpose
# true_headline = ["F1's Schumacher Slams Into Wall"]
# predicted_headline = ["Schumacher Crashes in Practice"]
BLEUscore, avgBLEUscore = getBLEUscore(true_headline, predicted_headline)
print("average BLEU score: %f" % avgBLEUscore)
summary = list(zip(BLEUscore, predicted_headline, true_headline, sentence))
# pd.set_option("display.max_rows", 999)
# pd.set_option('max_colwidth', 80)
df = pd.DataFrame(data=summary, columns=['BLEU score', 'Predicted headline', 'True headline', 'article'])
df_sortBLEU = df.sort_values('BLEU score', ascending=False)
# print(df_sortBLEU)
# Store the top 100 predicted headline in terms of BLEU score
output_file = 'BLEU.txt'
df_sortBLEU.head(100).to_csv(output_file, sep='\n', index=False,
line_terminator='\n-------------------------------------------------\n')
print("Finished creating results summary in %s!" %output_file)
def create_time_table(df, df_tsel, htmlname, col, vd, cfg):
# Prepare df and df_tsel
grouped = df.groupby(col)
# accurate start and end time
accstarts = []
accends = []
photos = []
# insert photos into df_tsel
for index,row in df_tsel.iterrows():
person = row['person']
i = person.split('_')[1]
start = int( row['start'] )
end = int( row['end'] )
group = grouped.get_group(int(i))
accstart = group.query('abs(time-%f)<=30' % start).time.min()
accend = group.query('abs(time-%f)<=30' % end).time.max()
accstarts.append(accstart)
accends.append(accend)
num_photo = group.query('abs(time-%f)<=30' % start).number.tolist()[1]
photo = '<img alt="not found" src="%s/%d.png" class="imgshow" onclick="goto(%d)"/>' % (vd.photo_dir, num_photo, accstart)
photos.append(photo)
df_tsel['photo']= photos
df_tsel['accstarts'] = format_time(np.array(accstarts))
df_tsel['accends'] = format_time(np.array(accends))
df_tsel = df_tsel[['person','accstarts','accends','photo']]
df_tsel = df_tsel.sort_values('accstarts')
df_tsel.columns=['person','start','end','photo']
# Make a html file
header ='<!DOCTYPE html> \n <html> \n <head> \n'
css = '<link rel="stylesheet" href="styles.css"> <link rel="stylesheet" href="table.css"> \n'
js = '<script src="/Users/chiachun/Exp/tagly4/demo/pvideo.js"> </script> \n'
header2 = '</head> \n <body> '
lvideo1 = ' <div style="float:left;margin-right:15px;"> <video id="Video1" height="400" controls> '
lvideo2 = '<source src="%s" type="video/mp4"> </video> </div> \n' % cfg.videoName
div1 = '<div style="overflow-x:auto;">\n'
div2 ='</div> </body> </html>'
pd.set_option('display.max_colwidth', -1)
f = open(htmlname,'w')
f.write(header); f.write(css); f.write(js); f.write(header2);
f.write(lvideo1); f.write(lvideo2); f.write(div1);
f.write(df_tsel.to_html(escape=False,index=False))
f.write(div2)
f.close()
def run(self):
print self.args
'''
dateline=%s" % day
'''
day = self.args[2]
pandas.set_option('display.width', 200)
d2 = self.mysql.getRecord("select s_code from s_stock_list where dateline=%s" % day)
for row in d2:
s_code = row['s_code']
#if s_code != 'sh600000':
# continue
self._chQ = self.getChuQuan(s_code)
sql_data = "select s_code,code,dateline,chg_m,chg,open,close,high,low,last_close,name FROM s_stock_trade WHERE s_code ='%s' and dateline >20150101 " % s_code
print sql_data
tmpdf2 = pandas.read_sql(sql_data, self.mysql.db)
tmpdf = tmpdf2.apply(self.format_chuquan_hanlder, axis=1)
tmpdf.sort_values(by=('dateline'), ascending=False)
ma_list = [5, 10, 20, 30, 60]
for ma in ma_list:
tmpdf['MA_' + str(ma)] = pandas.rolling_mean(tmpdf['close'], ma)
last5 = tmpdf.tail(60)
#print last5
#sys.exit()
for i5 in range(0, len(last5)):
if str(last5.iloc[i5].dateline) != day:
continue
word = s_code[2:] + str(last5.iloc[i5].dateline)
if math.isnan(last5.iloc[i5].MA_5):
break
if math.isnan(last5.iloc[i5].MA_10):
break
_m60 = last5.iloc[i5].MA_60
if math.isnan(last5.iloc[i5].MA_60):
_m60 = 0
else:
_m60 = round(_m60, 2)
_m30 = last5.iloc[i5].MA_30
if math.isnan(last5.iloc[i5].MA_30):
_m30 = 0
else:
_m30 = round(_m30, 2)
item = {}
item['s_code'] = s_code
item['dateline'] = last5.iloc[i5].dateline
item['hash'] = hashlib.md5(word).hexdigest()
item['ma5'] = round(last5.iloc[i5].MA_5, 2)
item['ma10'] = round(last5.iloc[i5].MA_10, 2)
item['ma20'] = round(last5.iloc[i5].MA_20, 2)
item['ma30'] = _m30
item['ma60'] = _m60
self.mysql.dbInsert('s_stock_average', item)
def min_data(self):
sql_data = "select * FROM s_stock_runtime WHERE dateline =20160607 and s_code='sz000048' "
tmpdf = pandas.read_sql(sql_data, self.mysql.db)
pandas.set_option('display.width', 400)
# ???????period_type??????'W'??'M'????'Q'?????'5min'?12??'12D'
period_type = 'W'
#????
# ??date????index
tmpdf.set_index('date_str', inplace=True)
period_stock_data = tmpdf.resample('1Min', how='last')
#period_stock_data =
#print len(period_stock_data)
#print period_stock_data['B_1_price'].sum()
period_stock_data['MA_1'] = pandas.rolling_mean(period_stock_data['B_1_price'], 1)
#period_stock_data = tmpdf.resample('5Min', how='last')
print period_stock_data
sys.exit()
df = pandas.DataFrame(columns=('k', 'v'))
data = {}
j = 0
for i in range(len(tmpdf)):
#print tmpdf.iloc[i]
_min = tmpdf.iloc[i].min_sec
#print _min
if _min > 150000 and '150000' in data.keys():
continue
_min = str(_min)
_min = _min[0:-2]
#print _min
# sys.exit()
#[0:-2]
_min_str = "%s00" % _min
#data[_min_str] =
if _min_str not in data.keys():
#data = {'k': _min_str, 'v': tmpdf.iloc[i].B_1_price}
j += 1
data[_min_str] = {'v': tmpdf.iloc[i].B_1_price}
df.loc[j] = {'k': _min_str, 'v': tmpdf.iloc[i].B_1_price}
#j += 1
#data.append(_v)
#sys.exit()
print df
#print tmpdf
def init(self, setting):
#self.mysql = sMysql(MYSQL_DB['host'], MYSQL_DB['user'], MYSQL_DB['password'], MYSQL_DB['dbname'])
limit = 100
if 'limit' in setting.keys():
limit = setting['limit']
_where = []
s_keys_list = setting.keys()
if 'start' not in s_keys_list and 'end' not in s_keys_list:
print u"StartTime OR EndTime is Error"
sys.exit()
_today = self.tools.d_date('%Y%m%d')
if 'end' not in setting.keys():
setting['end'] = _today
if 'start' not in setting.keys():
setting['start'] = setting['end']
if setting['start'] == setting['end']:
_where.append(" dateline = %s" % setting['end'])
else:
_where.append(" dateline <= %s" % setting['end'])
_where.append(" dateline >= %s" % setting['start'])
if 'universe' in setting.keys():
s_codes = " s_code in(%s)" % self.___set_universe(setting['universe'])
_where.append(s_codes)
_wheres = ' AND '.join(_where)
print u"=======????===%s====" % setting['end']
date_sql = "select dateline FROM s_opening_day WHERE dateline <=%s order by dateline desc limit %s" % (setting['end'], limit)
print date_sql
temp = self.mysql.getRecord(date_sql)
self.today = _today
self.lastDay = temp[0]['dateline']
self.yestoday = temp[1]['dateline']
pandas.set_option('display.width', 200)
sql_data = "select s_code,code,dateline,chg_m,chg,open,close,high,low,last_close,name,amount,run_market FROM s_stock_trade WHERE %s " % _wheres
#print sql_data
#sys.exit()
tmpdf = pandas.read_sql(sql_data, self.mysql.db)
#print tmpdf
#sys.exit()
#????????
if ('is_open_chuquan' in setting.keys()) and setting['is_open_chuquan']:
self._chQ = self.getChuQuan()
#print self._chQ
#sys.exit()
self.df = tmpdf.apply(self.format_chuquan_hanlder, axis=1)
else:
self.df = tmpdf
#print self.df
#sys.exit()
self.todayDF = self.df[self.df.dateline == int(self.lastDay)]
self.yestodayDF = self.df[self.df.dateline == int(self.yestoday)]
#sys.exit()
print "========init Days & init stock trader Done."
def main():
#?????????????????, ?????????
stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"}
for stock, code in stock_list.items():
globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16")
#code:?????start:?????end:????
#print(zsyh) #???????????
make_end_line()
print(zsyh.head())
make_end_line()
print(zsyh.columns)
make_end_line()
"""
????
date???
open????
high????
close????
low????
volume????
price_change?????
p_change????
ma5?5???
ma10?10???
ma20: 20???
v_ma5: 5???
v_ma10: 10???
v_ma20: 20???
turnover:???[???????]
"""
print(zsyh.describe())
make_end_line()
print(zsyh.info())
make_end_line()
plt.show(zsyh["close"].plot(figsize=(12,8))) #???????????
#pd.set_option("display.float_format", lambda x: "%10.3f" % x)
plt.show(zsyh["volume"].plot(figsize=(12,8)))
zsyh[["close","ma5","ma10","ma20"]].plot(subplots = True)
plt.show()
plt.show(zsyh[["close","ma5","ma10","ma20"]].plot(figsize=(12,8),linewidth=2))
plt.show(zsyh["p_change"].plot())
plt.show(zsyh["p_change"].plot(figsize=(10,4),legend=True,linestyle="--",marker="o"))
#???????????
plt.show(zsyh["p_change"].hist(bins=20))
plt.show(zsyh["p_change"].plot.kde()) #?????
#?????(kernel density estimation)?????????????????
plt.show(sns.kdeplot(zsyh["p_change"].dropna()))
plt.show(sns.distplot(zsyh["p_change"].dropna())) #??????????????????????
def explain_group(parent):
"""Scheduler explain CLI group."""
def _print_frame(df):
"""Prints dataframe."""
if not df.empty:
pd.set_option('display.max_rows', None)
pd.set_option('float_format', lambda f: '%f' % f)
pd.set_option('expand_frame_repr', False)
print(df.to_string(index=False))
@parent.group()
def explain():
"""Explain scheduler internals"""
pass
@explain.command()
@click.option('--instance', help='Application instance')
@click.option('--partition', help='Cell partition', default='_default')
@cli.admin.ON_EXCEPTIONS
def queue(instance, partition):
"""Explain the application queue"""
cell_master = make_readonly_master()
frame = reports.explain_queue(cell_master.cell,
partition,
pattern=instance)
_print_frame(frame)
@explain.command()
@click.argument('instance')
@click.option('--mode', help='Tree traversal method',
type=click.Choice(reports.WALKS.keys()), default='default')
@cli.admin.ON_EXCEPTIONS
def placement(instance, mode):
"""Explain application placement"""
cell_master = make_readonly_master()
if instance not in cell_master.cell.apps:
cli.bad_exit('Instance not found.')
app = cell_master.cell.apps[instance]
if app.server:
cli.bad_exit('Instace already placed on %s' % app.server)
frame = reports.explain_placement(cell_master.cell, app, mode)
_print_frame(frame)
del queue
del placement