def write_data(self, file_descr):
"""
Use dataframe to_excel to write into file_descr (filename) - open first if file exists.
"""
if os.path.isfile(file_descr):
print(file_descr, 'exists')
# Solution to keep existing data
book = load_workbook(file_descr)
writer = pd.ExcelWriter(file_descr, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
self.data_df.to_excel(writer, sheet_name='Mongo_Schema', index=True,
float_format='%.2f')
writer.save()
else:
self.data_df.to_excel(file_descr, sheet_name='Mongo_Schema', index=True,
float_format='%.2f')
python类ExcelWriter()的实例源码
def write_excel(filename, **kwargs):
"""Write data tables to an Excel file, using kwarg names as sheet names.
Parameters
----------
filename : str
The filename to write to.
kwargs : dict
Mapping from sheet names to data.
"""
writer = pd.ExcelWriter(filename)
for sheet_name, obj in kwargs.items():
if isinstance(obj, dict):
obj = _params_dict_to_dataframe(obj)
if isinstance(obj, pd.DataFrame):
obj.to_excel(writer, sheet_name=sheet_name)
writer.save()
writer.close()
def write_frame(f, excel_writer, to_excel_args=None):
"""
Write a Pandas DataFrame to excel by calling to_excel, returning an XLMap, that can be used to determine
the position of parts of f, using pandas indexing.
Parameters
----------
f : DataFrame
Frame to write to excel
excel_writer : str or ExcelWriter
Path or existing Excel Writer to use to write frame
to_excel_args : dict
Additional arguments to pass to DataFrame.to_excel, see docs for DataFrame.to_excel
Returns
-------
XLMap :
Mapping that corresponds to the position in the spreadsheet that frame was written to.
"""
xlf = XLDataFrame(f)
return xlf.to_excel(excel_writer, **to_excel_args)
def writeExcelOutput(self,table):
if isinstance(table,QTabWidget):
filename = table.currentWidget().model()._filename
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
for i in range(table.count()):
data=table.widget(i)
sheetname=table.tabText(i)
data.model()._df.to_excel(writer, sheet_name=sheetname,index=False)
data.model()._dirty = False
writer.save()
if isinstance(table,QTableView):
filename = table.model()._filename
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
table.model()._df.to_excel(writer, sheet_name='Sheet 1',index=False)
table.model()._dirty = False
writer.save()
def export_pingan_trust16():
issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
writer = pd.ExcelWriter('time_searies.xlsx')
for company in issuers['name']:
print(company)
query = db.pingan_total.find({"name":company},{'_id':0,'name':0}).sort("rptDate" , 1)
data = pd.DataFrame(list(query))
data = data[ ['??','??','??','??','??', '??', '??','??', 'rptDate']]
data = data.rename(columns={"??":"???????","??":"????","??":"?????","??":"???????"})
data.to_excel(writer, sheet_name=company)
writer.save()
#db = client.companies
#collection = db.total_nums
#insert_record = json.loads(result.to_json(orient='records'))
#ret = db.total_nums.insert_many(insert_record)
# ????
#collection = db.
#df = sued_in_arrears()
#insert_record = json.loads(df.to_json(orient='records'))
#ret = db.collection.insert_many(insert_record)
def gen_decile_table(model_top_precision, valid_df):
""" Creates the decile table """
valid_top_precision_df = valid_df[valid_df.model_id == model_top_precision]
valid_dec_sort_df = valid_top_precision_df.sort_values('y_pred_proba', ascending = False)
total_breaks = valid_dec_sort_df.y_true.sum()
total_blocks = len(valid_dec_sort_df)
dec_breaks = total_breaks / 10
dec_blocks = total_blocks / 10
decile_df = pd.DataFrame(columns=('model_id','Decile', 'No_of_blocks','risk_mul', 'Actual_breaks', 'Precision_in_decile', 'Recall_overall', 'Lift_above_random'))
for i in range(10):
break_sum = valid_dec_sort_df.y_true[i*dec_blocks:(i+1)*dec_blocks].sum()
risk_mul = valid_dec_sort_df.y_pred_proba[i*dec_blocks:(i+1)*dec_blocks].sum()
lift = break_sum / dec_breaks
conversion = break_sum *100 / dec_blocks
recall = break_sum *100 / total_breaks
decile_df.loc[len(decile_df)] = [model_top_precision,i+1,dec_blocks,risk_mul ,break_sum, conversion, recall, lift]
decile_df.loc[len(decile_df)] = ['-', 'Total',total_blocks, '_' ,total_breaks, total_breaks/total_blocks, '-', '-']
writer = pd.ExcelWriter('decile_table.xlsx', engine='xlsxwriter')
decile_df.to_excel(writer, sheet_name='Sheet1')
def save_performance(self, *args):
w = pd.ExcelWriter("performance&%s.xls" % datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
def iter_save(dict_like, name=None):
for key, data in dict_like.items():
table = key if not name else name + "_" + key
if isinstance(data, dict):
iter_save(data, key)
continue
elif isinstance(data, pd.Series):
data = pd.DataFrame(data)
try:
data.to_excel(w, table)
except Exception as e:
print(e.message)
print("%s can not be saved as .xls file" % table)
print(data)
iter_save(self.output(*args))
w.save()
def merge_gender_data(**kwargs):
pni = kwargs[fpn.PN_INPUT]
# get index from source data dict
df = pd.DataFrame(index=pni.data_dict.keys())
for k, v in kwargs.items():
if k not in fpn.PIPE_NODE_KWARGS:
for gender in ('M', 'F'):
df[k + '_' + gender] = v[gender]
return df
#@fpn.pipe_node
#def write_xlsx(**kwargs):
#pni = kwargs[fpn.PN_INPUT]
#xlsx_fp = os.path.join(pni.output_dir, 'output.xlsx')
#xlsx = pd.ExcelWriter(xlsx_fp)
#for k, df in pni.store_items():
#df.to_excel(xlsx, k)
#xlsx.save()
#return xlsx_fp
def approach_pipe_4b():
a = (PN4.name_count_per_year(lambda n: n.lower().startswith('lesl'))
| PN4.percent | fpn.store('lesl'))
b = (PN4.name_count_per_year(lambda n: n.lower().startswith('dana'))
| PN4.percent | fpn.store('dana'))
f = (PN4.merge_gender_data(lesl=a, dana=b)
| PN4.year_range(1920, 2000)
| fpn.store('merged') * 100
| PN4.plot('gender.png')
| PN4.open_plot)
pni = PN4.PNI('/tmp')
f[pni]
xlsx_fp = os.path.join(pni.output_dir, 'output.xlsx')
xlsx = pd.ExcelWriter(xlsx_fp)
for k, df in pni.store_items():
df.to_excel(xlsx, k)
xlsx.save()
os.system('libreoffice --calc ' + xlsx_fp)
def GenerateXLSX(X, which=1):
columns = []
# generate column names
for dimension in ['White', 'Black', 'Player', 'Opponent', 'Empty']:
for i in range(1, 9):
for char in 'abcdefgh':
position = 'Position: ' + char + str(i) + ' (' + dimension + ')'
columns.append(position)
columns.append('White\'s Move Preceded This State')
columns.append('Outcome')
frame = pd.DataFrame(X, columns=columns)
if which==1:
writer = pd.ExcelWriter(r'/Users/TeofiloZosa/PycharmProjects/BreakthroughANN/value_net_rank_binary/NPDataSets/WBPOE/UnshuffledBinaryFeaturePlanesDataset1.xlsx', engine='xlsxwriter')
else:
writer = pd.ExcelWriter(r'/Users/TeofiloZosa/PycharmProjects/BreakthroughANN/value_net_rank_binary/NPDataSets/WBPOE/UnshuffledBinaryFeaturePlanesDataset2.xlsx', engine='xlsxwriter')
frame.to_excel(writer, 'Sheet1')
writer.save()
def parse_statistics(logfile):
xl = pd.ExcelFile(logfile)
df = xl.parse("Sheet")
df = df.sort_values(by='Line Numbers')
writer = pd.ExcelWriter(logfile)
df.to_excel(writer, sheet_name='Sheet', index=False)
writer.save()
wb = openpyxl.load_workbook(logfile)
ws = wb.active
row_count = ws.max_row
column_count = ws.max_column
chart = ScatterChart()
chart.title = "Time upload domain names"
chart.style = 13
chart.x_axis.title = "Line numbers"
chart.y_axis.title = "Time, sec"
xvalues = Reference(ws, min_col=1, min_row=2, max_row=row_count)
color_choice = ['3F888F', 'D24D57']
for i in range(2, column_count + 1):
values = Reference(ws, min_col=i, min_row=1, max_row=row_count)
series = Series(values, xvalues, title_from_data=True)
series.marker.symbol = "diamond"
series.graphicalProperties.line.solidFill = color_choice[i-2]
series.marker.graphicalProperties.line.solidFill = color_choice[i-2]
series.marker.graphicalProperties.solidFill = color_choice[i-2]
series.graphicalProperties.line.width = 20000
chart.series.append(series)
chart.legend.legendPos = 'b'
ws.add_chart(chart)
wb.save(logfile)
markov_stock_analysis v2-4.py 文件源码
项目:markov_stock_analysis
作者: nb5hd
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def compare_securities_2x2(sec_list, weeks, thresh=0.0):
"""
Returns an excel sheet with stock name, this week's percentage change, mean of next week's predicted
percentage change, and standard deviation of next week's predicted percentage change
:param sec_list: <list> with all the security names
:param weeks: <int> Number of weeks since the most recent recorded date (cannot use years/months because months and
years have varying quantities of days; Numpy requires constancy in datetime arithmetic)
:param thresh: <float> divides percentage changes into two categories (>= and <); applies to each security
"""
sec_dict = {}
for name in sec_list:
sec_info = predict_percentage_change(name, weeks=weeks, threshold=thresh)
sec_dict[name] = sec_info
sec_df = pd.DataFrame(sec_dict).transpose()
sec_df.columns = ['Last % Change', "Mean Predicted % Change", "Standard Deviation " +
"Predicted % Change"]
sec_df= sec_df.sort_values(by=["Mean Predicted % Change"], ascending=True)
writer = pd.ExcelWriter('output.xlsx')
sec_df.to_excel(writer, 'Sheet1')
writer.save()
#compare_securities_2x2(["BAC", "AAPL", "GOOG", "T"], weeks=26, thresh=2.0)
generate_stock_report.py 文件源码
项目:chinese-stock-Financial-Index
作者: lfh2016
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def save_xls(self, dframe): # ???????????excel?????sheet
xls_path = os.path.join(current_folder, self.name + '.xlsx')
if os.path.exists(xls_path): # excel ??????
book = load_workbook(xls_path)
writer = pd.ExcelWriter(xls_path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
dframe.to_excel(writer, self.name)
writer.save()
else: # ??????
writer = ExcelWriter(xls_path)
dframe.to_excel(writer, self.name)
writer.save()
def anon_pay_table(case,
proportional=True,
mult=1.0,):
'''Anonymize the "rates" worksheet of the "pay_tables.xlsx" input file.
The rates may be proportionally adjusted (larger or smaller) or
disproportionally adjusted with a fixed algorithm.
A copy of the original excel file is copied and saved as
"pay_tables_orig.xlsx".
All modifications are inplace.
inputs
case (string)
the case name
proportional (boolean)
if True, use the mult input to increase or decrease all of the
"rates" worksheet pay data proportionally. If False, use a fixed
algorithm to disproportionally adjust the pay rates.
mult (integer or float)
if the proportional input is True, multiply all pay rate values
by this input value
'''
inplace = True
path, d = copy_excel_file(case, 'pay_tables', return_path_and_df=True)
df = d['rates']
anon_pay(df,
proportional=proportional,
mult=mult,
inplace=inplace)
d['rates'] = df
with pd.ExcelWriter(path, engine='xlsxwriter') as writer:
for ws_name, df_sheet in d.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
print('\nanon_pay_table routine complete')
def save_excel():
df = ts.get_today_all()
df.to_excel('1.xls', sheet_name='all_stock')
df2 = ts.get_hist_data('300333')
df2.to_excel('1.xls', sheet_name='basic_info')
df.ExcelWriter
out = pd.ExcelWriter("2.xls")
df.to_excel()
def report(simulation, filename='report.xls'):
""" This function generates a report as an excel sheet.
simulation the simualation that should be exported to excel
filename filename of the excel file
"""
writer = pd.ExcelWriter(filename)
for account in simulation.accounts:
df = account.report.as_df()
df.to_excel(writer, sheet_name=account.name)
writer.save()
def to_excel(self, filename='myfile.xlsx'):
"""Export informations to a excel file
Kargs:
filename: string
Name of the excel file ex: filename='myfile.xlsx'
"""
writer = ExcelWriter(filename)
self.clfinfo.to_excel(writer,'Classifier')
self.statinfo.to_excel(writer,'Statistics')
try:
self.featinfo.to_excel(writer,'Features')
except:
warn('Informations about features has been ignored. Run fit()')
writer.save()
def close(self, format='csv'):
import pandas as pd
if format == 'csv':
pd.DataFrame.from_dict(self._data, orient='index').to_csv(self._file)
elif format == 'json':
pd.DataFrame.from_dict(self._data, orient='index').to_json(self._file)
else:
writer = pd.ExcelWriter(self._file)
pd.DataFrame.from_dict(self._data, orient='index').to_excel(writer)
writer.save()
def close(self, format='csv'):
import pandas as pd
if format == 'csv':
pd.DataFrame.from_dict(self._data, orient='index').to_csv(self._file)
elif format == 'json':
pd.DataFrame.from_dict(self._data, orient='index').to_json(self._file)
else:
writer = pd.ExcelWriter(self._file)
pd.DataFrame.from_dict(self._data, orient='index').to_excel(writer)
writer.save()
def main(workspace, path, name):
table_name = ['accessPoints', 'ProbeRequests', 'ProbeRequests', 'EAP', 'Hidden_SSID', 'inscope_accessPoints', 'inscope_ProbeRequests', 'inscope_ProbeResponses']
sheet_name = ['AccessPoints', 'ProbeRequests', 'ProbeRequests', 'EAP', 'Hidden_SSID', 'Inscope_AccessPoints', 'Inscope_ProbeRequests', 'Inscope_ProbeResponses']
ws = workspace
q = queries()
ws1 = q.db_connect(ws)
writer = dp.ExcelWriter(path+name+'.xlsx', engine='xlsxwriter')
j = 0
print "Exporting: "+path+name+'.xlsx'
for tbn in table_name:
try:
td = dp.read_sql('select * from '+tbn+'', ws1)
if td.empty:
pass
j +=1
print colors.RD + "[-]" + colors.NRM + " Skipping: " + sheet_name[j] + ". No Data in table."
else:
td.to_excel(writer, sheet_name=''+sheet_name[j]+'', index=False)
j +=1
print colors.GRN + "[+]" + colors.NRM + " Exporting: " + sheet_name[j] + "."
except ValueError:
continue
except pandas.io.sql.DatabaseError:
continue
writer.save()
print "Export Completed"
abstract_data_project.py 文件源码
项目:the-magical-csv-merge-machine
作者: entrepreneur-interet-general
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def to_xls(self, module_name, file_name):
'''
Takes the file specified by module and file names and writes an xls in
the same directory with the same name (changing the file extension).
Columns of the original file will be written in the first sheet.
Columns containing "__" will be written the second sheet
Use for download only!
INPUT:
- module_name:
- file_name:
'''
raise DeprecationWarning('Excel download currently not supported due'\
'to potential memory issues with large files')
file_path = self.path_to(module_name, file_name)
assert file_name[-4:] == '.csv'
new_file_name = file_name[:-4] + '.xlsx'
new_file_path = self.path_to(module_name, new_file_name)
tab = pd.read_csv(file_path, encoding='utf-8', dtype=str)
columns_og = [x for x in tab.columns if '__' not in x]
columns_new = [x for x in tab.columns if '__' in x]
writer = pd.ExcelWriter(new_file_path)
tab[columns_og].to_excel(writer, 'original_file', index=False)
tab[columns_new].to_excel(writer, 'normalization', index=False)
writer.save()
return new_file_name
def export_to(self, file_path, batchsize=1000):
self.xls_writer = ExcelWriter(file_path)
# get record count
record_count = self._query_mongo(count=True)
# query in batches and for each batch create an XLSDataFrameWriter and
# write to existing xls_writer object
start = 0
header = True
while start < record_count:
cursor = self._query_mongo(self.filter_query, start=start,
limit=batchsize)
data = self._format_for_dataframe(cursor)
# write all cursor's data to their respective sheets
for section_name, section in self.sections.iteritems():
records = data[section_name]
# TODO: currently ignoring nested repeats
# so ignore sections that have 0 records
if len(records) > 0:
# use a different group delimiter if needed
columns = section["columns"]
if self.group_delimiter != DEFAULT_GROUP_DELIMITER:
columns = [self.group_delimiter.join(col.split("/"))
for col in columns]
columns = columns + self.EXTRA_COLUMNS
writer = XLSDataFrameWriter(records, columns)
writer.write_to_excel(self.xls_writer, section_name,
header=header, index=False)
header = False
# increment counter(s)
start += batchsize
time.sleep(0.1)
self.xls_writer.save()
def setUpClass(cls):
cls.f = test_frame
cls.writer = pd.ExcelWriter(path_for('charts', cls.__name__), engine=cls.to_excel_args['engine'])
cls.xlmap = cls.f.to_excel(cls.writer, **cls.to_excel_args)
cls.workbook = cls.xlmap.writer.book
return cls
def export_pingan_trust16():
issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
writer = pd.ExcelWriter('time_searies_all.xlsx')
for company in issuers['name']:
print(company)
query = db.pingan_total.find({"name":company},{'_id':0,'name':0}).sort("rptDate" , 1)
data = pd.DataFrame(list(query))
data = data[ ['??','??','??','??','??', '??', '??','??', 'rptDate']]
data = data.rename(columns={"??":"???????","??":"????","??":"?????","??":"???????"})
data.to_excel(writer, sheet_name=company)
writer.save()
def process():
df=pd.read_excel(DATA_FILE,sheetname=[1], header = 0,index_col=0,convert_float=False)[1]
# df0 = df[1].fillna(0)
# df = df.head(100)
# resultList=np.zeros(df.index.size)
resultList=[]
idList=[]
for i in range(df.index.size):
for nan in range(1,40):
if math.isnan(df.values[i][nan]):
# print(df[1].values[i])
df.values[i][40]=np.nan
break
if nan < 39: # for nan results
idList.append(df.index[i])
resultList.append([df.values[i][0],0])
print("%s:%s"%(df.index[i],df.values[i][0]))
continue
ID=df.index[i]
df.values[i][0]
data=df.values[i][1:40].reshape(13,3)
# handle nulls
# print(data)
industryScore = calcIndustry(ID,data)
trendScore = calcTrend(ID,data)
fluncScore = calcFluctuation(ID,data)
a = np.append(industryScore,trendScore)
b= np.append(a,fluncScore)
idList.append(ID)
resultList.append([df.values[i][0],calcTotal(b)])
# print("%s:%f"%(ID,resultList[i]))
resultdf=pd.DataFrame(resultList,idList,columns=['NAME','Score'])
with pd.ExcelWriter('result.xls') as writer:
resultdf.to_excel(writer,sheet_name=str(0))
def tasks_to_spreadsheet(tasks, filepath):
import pandas
df_tasks = pandas.DataFrame.from_records([
task.to_dict()
for task in tasks
])
resources = set(resource for task in tasks for resource in task.resources)
df_resources = pandas.DataFrame.from_records([
resource.to_dict()
for resource in resources
])
with pandas.ExcelWriter(filepath, engine='xlsxwriter') as writer:
df_tasks.to_excel(writer, sheet_name='tasks', index=False)
df_resources.to_excel(writer, sheet_name='resources', index=False)
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--inputdir', help='Input directory to be converted', required=True)
parser.add_argument('--outdir', help='Output directory', required=True)
parser.add_argument('--cols', help='Columns to include', default=DEFAULT_COLUMNS)
parser.add_argument('--sortby', help='Row that is going to be used for sorting', default='prediced_score')
parser.add_argument('--ascending', help='Sort in ascending order (def. False)', default=False)
parser.add_argument('--newcols', help='Columns to be added', default=['LABEL'])
args = parser.parse_args(argv)
# Get a list of files to be converted
filenames = glob.glob('data/output/*.csv')
for filename in filenames:
# Skip existing files
outputfile = os.path.join(args.outdir,
'.'.join(os.path.basename(filename).split('.')[:1]) + '.xls')
if os.path.exists(outputfile):
continue
df = pd.read_csv(filename)
# Sort the data
df.sort(args.sortby, ascending=args.ascending, inplace=True)
# Drop columns that we dont need
selected_cols = args.cols.split(' ')
for col in df.columns.tolist():
if selected_cols.count(col) == 0:
df.drop(col, axis=1, inplace=True)
# Add new cols
for newcol in args.newcols:
df[newcol] = ''
# Store file
outputfile = os.path.join(args.outdir,
'.'.join(os.path.basename(filename).split('.')[:1]) + '.xls')
if not os.path.exists(os.path.dirname(outputfile)):
os.makedirs(os.path.dirname(outputfile))
writer = pd.ExcelWriter(outputfile, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
print('Wrote a new excel file: %s' % outputfile)
def _save_origin(self, path):
# if not self.initialized:
# raise ValueError("trader not initialized, no data to perform")
writer = ExcelWriter(path, encoding="utf-8")
pd.DataFrame(self.performance.equity).to_excel(writer, "??")
self.performance.order_details.to_excel(writer, "??")
writer.save()
def export(df, table, export_type='excel', target_path=None, if_exists='replace', suffix=None):
if target_path:
if export_type == 'excel' and not suffix:
suffix = 'xlsx'
target_file = os.path.join(target_path,
table + '-' + str(datetime.date.today())) + '.' + str(suffix or export_type)
if if_exists == 'replace' and os.path.exists(target_file):
os.remove(target_file)
export_io = target_file
else:
export_io = BytesIO()
if export_type == 'excel':
writer = pd.ExcelWriter(export_io, engine='xlsxwriter')
df.to_excel(writer, index=False)
writer.save()
elif export_type == 'csv':
export_io = BytesIO(df.to_csv(target_path, index=False, chunksize=4096).encode())
elif export_type == 'json':
export_io = BytesIO(df.to_json(target_path, orient='records').encode())
elif export_type == 'pickle':
pkl.dump(df, export_io, protocol=pkl.HIGHEST_PROTOCOL)
else:
raise NotImplementedError("export type {} is not supported".format(export_type))
return export_io, table + '.' + str(suffix or export_type)
def write(self, if_exists:str=None):
#from pyexcelerate import Workbook
self._logger.info("Writing rows to storage")
#wb = Workbook()
#wb.new_sheet("Sheet1", data=self.data)
#wb.save("{}.xlsx".format(os.path.join(self._db_dir, self.table)))
writer = pd.ExcelWriter("{}.xlsx".format(os.path.join(self._db_dir, self.table))
, engine='xlsxwriter')
self.data.to_excel(writer)
writer.save()
self._logger.info("Finished writing rows to storage")