def execute(self, context):
hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
self.log.info("Dumping MySQL query results to local file")
conn = mysql.get_conn()
cursor = conn.cursor()
cursor.execute(self.sql)
with NamedTemporaryFile("wb") as f:
csv_writer = csv.writer(f, delimiter=self.delimiter, encoding="utf-8")
field_dict = OrderedDict()
for field in cursor.description:
field_dict[field[0]] = self.type_map(field[1])
csv_writer.writerows(cursor)
f.flush()
cursor.close()
conn.close()
self.log.info("Loading file into Hive")
hive.load_file(
f.name,
self.hive_table,
field_dict=field_dict,
create=self.create,
partition=self.partition,
delimiter=self.delimiter,
recreate=self.recreate,
tblproperties=self.tblproperties)
python类writer()的实例源码
def to_csv(
self,
hql,
csv_filepath,
schema='default',
delimiter=',',
lineterminator='\r\n',
output_header=True,
fetch_size=1000):
schema = schema or 'default'
with self.get_conn(schema) as conn:
with conn.cursor() as cur:
self.log.info("Running query: %s", hql)
cur.execute(hql)
schema = cur.description
with open(csv_filepath, 'wb') as f:
writer = csv.writer(f,
delimiter=delimiter,
lineterminator=lineterminator,
encoding='utf-8')
if output_header:
writer.writerow([c[0] for c in cur.description])
i = 0
while True:
rows = [row for row in cur.fetchmany(fetch_size) if row]
if not rows:
break
writer.writerows(rows)
i += len(rows)
self.log.info("Written %s rows so far.", i)
self.log.info("Done. Loaded a total of %s rows.", i)
def save_frames(self, dir_name):
'''
Save generated frames to file.
'''
with open(dir_name + os.sep + 'frames' + '.csv', 'wb') as f:
# Manually encode a BOM, utf-8-sig didn't work with unicodecsv
f.write(u'\ufeff'.encode('utf8'))
csv_writer = csv.writer(f, delimiter='\t', encoding='utf-8')
for i, frame in enumerate(self.frames):
csv_writer.writerow([self.keyword_list.keywords[i][0]] +
[f[0] for f in frame])
csv_writer.writerow([self.keyword_list.keywords[i][1]] +
[str(f[1]) for f in frame])
def save_keywords(self, dir_name):
'''
Save generated keywords to file.
'''
with open(dir_name + os.sep + 'keywords' + '.csv', 'wb') as f:
# Manually encode a BOM, utf-8-sig didn't work with unicodecsv
f.write(u'\ufeff'.encode('utf8'))
csv_writer = csv.writer(f, delimiter='\t', encoding='utf-8')
for k in self.keywords:
csv_writer.writerow([k[0], str(k[1])])
def save_topics(self, dir_name):
'''
Save generated topics to file.
'''
with open(dir_name + os.sep + 'topics' + '.csv', 'wb') as f:
# Manually encode a BOM, utf-8-sig didn't work with unicodecsv
f.write(u'\ufeff'.encode('utf8'))
csv_writer = csv.writer(f, delimiter='\t', encoding='utf-8')
for topic in self.topics:
csv_writer.writerow([t[1] for t in topic])
csv_writer.writerow([str(t[0]) for t in topic])
def download_requests_data(self):
'''
Handles creating csv or json file from all of the Requested Data
:returns: json or csv file
'''
file_format = request.query_string
requests = \
_get_action('requestdata_request_list_for_sysadmin', {})
s = StringIO()
if 'json' in file_format.lower():
response.headerlist = \
[('Content-Type', 'application/json'),
('Content-Disposition',
'attachment;filename="data_requests.json"')]
json.dump(requests, s, indent=4)
return s.getvalue()
if 'csv' in file_format.lower():
response.headerlist = \
[('Content-Type', 'text/csv'),
('Content-Disposition',
'attachment;filename="data_requests.csv"')]
writer = csv.writer(s, encoding='utf-8')
header = True
for k in requests:
if header:
writer.writerow(k.keys())
header = False
writer.writerow(k.values())
return s.getvalue()
def nodes_csv():
headers = [
'Display name',
'Host identifier',
'Enrolled On',
'Last Check-in',
'Last IP Address',
'Is Active',
]
column_names = map(itemgetter(0), current_app.config['DOORMAN_CAPTURE_NODE_INFO'])
labels = map(itemgetter(1), current_app.config['DOORMAN_CAPTURE_NODE_INFO'])
headers.extend(labels)
headers = list(map(str.title, headers))
bio = BytesIO()
writer = csv.writer(bio)
writer.writerow(headers)
for node in Node.query:
row = [
node.display_name,
node.host_identifier,
node.enrolled_on,
node.last_checkin,
node.last_ip,
node.is_active,
]
row.extend([node.node_info.get(column, '') for column in column_names])
writer.writerow(row)
bio.seek(0)
response = send_file(
bio,
mimetype='text/csv',
as_attachment=True,
attachment_filename='nodes.csv'
)
return response
def calculateMetrics(list_rank,
string_lyricsRecognizer,
coef_post_processor,
proportion_std,
am):
"""
Calculate matching evaluation metrics
If HSMMs is evaluated, set coef_post_processor=0
:param list_rank:
:param string_lyricsRecognizer:
:param coef_post_processor:
:param proportion_std:
:param am:
:return:
"""
mrr = MRR(list_rank)
top1hit = topXhit(1,list_rank)
top3hit = topXhit(3,list_rank)
top5hit = topXhit(5,list_rank)
top10hit = topXhit(10,list_rank)
top20hit = topXhit(20,list_rank)
top100hit = topXhit(100,list_rank)
# write results into csv
path_eval = path.join(currentPath,
'..',
'eval',
class_name+'_'+am+'_'+string_lyricsRecognizer+'_'+str(coef_post_processor)+'_'+str(proportion_std)+'.csv')
with open(path_eval,'wb') as csvfile:
w = csv.writer(csvfile)
w.writerow(['MRR',mrr])
w.writerow(['top 1 hit',top1hit])
w.writerow(['top 3 hit',top3hit])
w.writerow(['top 5 hit',top5hit])
w.writerow(['top 10 hit',top10hit])
w.writerow(['top 20 hit',top20hit])
w.writerow(['top 100 hit',top100hit])
def create_csv(service, report_date, start_date, end_date, dois):
# Open temp csv file to write data
filedata = io.BytesIO()
writer = csv.writer(filedata)
#Run Google Analytics query
data = run_query(service, start_date, end_date, 'ga:totalEvents', 'ga:eventLabel', 'ga:eventAction==download')
rows = data.get('rows')
#1. Write client-report data to csv
writer.writerow(["My Fancy Analytics Report!"])
writer.writerow(["Generated on " + report_date])
writer.writerow(["Data for " + start_date + " to " + end_date])
writer.writerow([])
writer.writerow(["Aggregate Data"])
writer.writerow(["Items in repository", len(dois)])
writer.writerow(["Items downloaded at least once", len(rows)])
writer.writerow(["Items linked to at least 1 ORCID iD"])
writer.writerow([])
writer.writerow(["Item Data"])
writer.writerow(["DOI", "Downloads", "ORCID Records with this DOI"])
if rows is not None:
for r in rows:
writer.writerow([r[0], r[1]])
writer.writerow([])
return filedata.getvalue()
#MAIN FUNCTION
def transactions_csv(year, month):
date = datetime.date(year, month, 1)
budget = load_monthly_budget_from_config(config, date, storage=storage)
out = StringIO.StringIO()
writer = unicodecsv.writer(out)
for tx in budget.transactions:
writer.writerow(tx)
return out.getvalue(), {"Content-Disposition": "attachment; filename=%s-%s.csv" % (year, month),
"Content-Type": "text/csv"}
def save_accounts(self, accounts):
with codecs.open(self.get_accounts_filename(), 'w') as f:
writer = unicodecsv.writer(f)
for acc in accounts:
writer.writerow(acc)
def save_transactions(self, transactions, filename):
with codecs.open(filename, 'w') as f:
writer = unicodecsv.writer(f)
for tx in transactions:
writer.writerow(self._transaction_to_csv_row(tx))
def process_results(self, results):
'''
Processes a page full of results.
Saves pdf for each result.
'''
try:
articles = results[0]['records']['article']
with open(self.csv_file, 'ab') as csv_file:
writer = csv.DictWriter(csv_file, FIELDS, encoding='utf-8')
if self.harvested == 0:
writer.writeheader()
for article in articles:
article_id = article['id']
row = self.prepare_row(article)
writer.writerow(row)
if self.pdf:
pdf_url = self.get_pdf_url(article_id)
if pdf_url:
pdf_filename = self.make_filename(article)
pdf_file = os.path.join(self.data_dir, 'pdf', '{}.pdf'.format(pdf_filename))
urlretrieve(pdf_url, pdf_file)
if self.text:
text = article.get('articleText')
if text:
text_filename = self.make_filename(article)
text = re.sub('<[^<]+?>', '', text)
text = re.sub("\s\s+", " ", text)
text_file = os.path.join(self.data_dir, 'text', '{}.txt'.format(text_filename))
with open(text_file, 'wb') as text_output:
text_output.write(text.encode('utf-8'))
time.sleep(0.5)
self.harvested += self.get_highest_n(results)
print('Harvested: {}'.format(self.harvested))
except KeyError:
pass
def AveragesByClassTypeCSV(request):
# Create the HttpResponse object with the appropriate CSV header.
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="averagesByClassDescriptionType.csv"'
writer = csv.writer(response)
startDate = getDateTimeFromGet(request,'startDate')
endDate = getDateTimeFromGet(request,'endDate')
results = getAveragesByClassType(startDate,endDate)
role_names = [x.replace(str(_('Average ')),'') for x in results.keys() if x.startswith(str(_('Average ')))]
header_list = [str(_('Class Type')),str(_('Total Classes')),str(_('Total Students')),str(_('Avg. Students/Class'))]
for this_role in role_names:
header_list += [str(_('Total %s' % this_role)), str(_('Avg. %s/Class' % this_role))]
# Note: These are not translated because the chart Javascript looks for these keys
writer.writerow(header_list)
for key,value in results.items():
this_row = [
key,
value.get(str(_('Series')),0),
value.get(str(_('Registrations')),0),
value.get(str(_('Average Registrations')),None),
]
for this_role in role_names:
this_row += [
value.get(str(_('Total %s' % this_role)), 0),
value.get(str(_('Average %s' % this_role)), 0)
]
writer.writerow(this_row)
return response
def ClassCountHistogramCSV(request):
# Create the HttpResponse object with the appropriate CSV header.
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="studentHistogramData.csv"'
cohortStart = getDateTimeFromGet(request,'cohortStart')
cohortEnd = getDateTimeFromGet(request,'cohortEnd')
results = getClassCountHistogramData(cohortStart=cohortStart,cohortEnd=cohortEnd)
writer = csv.writer(response)
# Note: These are not translated because the chart Javascript looks for these keys
header_row = ['# of Classes']
keys = OrderedDict()
for v in results.values():
keys.update(v)
header_row += [x for x in keys.keys()]
writer.writerow(header_row)
for k,v in results.items():
this_row = [k]
this_row += [v.get(x,None) for x in keys.keys()]
writer.writerow(this_row)
return response
def MonthlyPerformanceCSV(request):
# Create the HttpResponse object with the appropriate CSV header.
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="monthlyPerformance.csv"'
writer = csv.writer(response)
yearTotals = getMonthlyPerformance()
all_years = [k for k in yearTotals['Hours'].keys() if k != 'MonthlyAverage']
all_years.sort()
# Write headers first
headers_list = ['Data Series','Month','All-Time Avg.']
for year in all_years:
headers_list.append(str(year))
writer.writerow(headers_list)
# Note: These are not translated because the chart Javascript looks for these keys
yearTotals_keys = {
'Total Student-Hours': 'StudentHours',
'Avg. Students/Hour': 'AvgStudents',
'Hours of Instruction': 'Hours',
'Unique Registrations': 'Registrations',
'Total Students': 'EventRegistrations',
}
for series,key in yearTotals_keys.items():
for month in range(1,13):
this_row = [
series,
month_name[month],
yearTotals[key]['MonthlyAverage'][month],
]
for year in all_years:
this_row.append(yearTotals[key][year][month])
writer.writerow(this_row)
return response
def render_to_csv(self, context):
statement = context['statement']
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="financialStatementByMonth.csv"'
writer = csv.writer(response, csv.excel)
response.write(u'\ufeff'.encode('utf8')) # BOM (optional...Excel needs it to open UTF-8 file properly)
header_list = [
_('Month Name'),
_('Revenues: Net'),
_('Expenses: Instruction'),
_('Expenses: Venue'),
_('Expenses: Other'),
_('Expenses: Total'),
_('Registrations'),
_('Net Profit'),
]
writer.writerow(header_list)
for x in statement['statementByMonth']:
this_row_data = [
x['month_name'],
x['revenues'],
x['expenses']['instruction'],
x['expenses']['venue'],
x['expenses']['other'],
x['expenses']['total'],
x['registrations'],
x['net_profit'],
]
writer.writerow(this_row_data)
return response
def save(self, s3_conn, s3_prefix):
"""Save stats to S3, including percentages
Args:
s3_conn (boto.Connection) - an s3 connection
s3_prefix (str) s3 path (including bucket) to save dataset stats
"""
bucket_name, prefix = split_s3_path(s3_prefix)
bucket = s3_conn.get_bucket(bucket_name)
for field_name, counts in self.accumulator.items():
output = BytesIO()
writer = csv.writer(output)
for value, count in counts.most_common():
writer.writerow([value, count])
key = boto.s3.key.Key(
bucket=bucket,
name='{}/{}/{}/{}.csv'.format(
prefix,
self.directory,
self.quarter,
field_name
)
)
logging.info('Writing stats to %s', key)
output.seek(0)
key.set_contents_from_string(output.getvalue())
def saveSentiment(fileToSave,all_predictions,all_scores):
text = ''.join(open("./data/"+fileToSave+".txt").readlines()).decode('utf8')
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
book = tokenizer.tokenize(text)
book = [cleanForView(sent) for sent in book]
toOut = zip(book,all_predictions,all_scores)
import unicodecsv as csv
myfile = open(fileToSave+'.csv', 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(["Text","Binary_Sentiment","Cont_Sentiment"])
for row in toOut:
wr.writerow(row)
print "Saved",fileToSave+'.csv'
def _append(self, row, **make_kwargs):
if not self._skip_resource(**make_kwargs):
# append a row (with values in native python format) to the csv file (creates the file and header if does not exist)
if not self.csv_path:
raise Exception('cannot append without a path')
fields = self.descriptor["schema"]["fields"]
if not hasattr(self, "_csv_file_initialized"):
self._csv_file_initialized = True
self.logger.info('writing csv resource to: {}'.format(self.csv_path))
with open(self.csv_path, 'wb') as csv_file:
unicodecsv.writer(csv_file, encoding="utf-8").writerow([field["name"] for field in fields])
with open(self.csv_path, 'ab') as csv_file:
unicodecsv.writer(csv_file, encoding="utf-8").writerow([uncast_value(row[field["name"]], field) for field in fields])