def write_results_at_various_thresholds(token_to_probes, check_fingerprints, increment_threshold_by=0.01):
"""Output to CSV results at various thresholds. Used to draw ROC curve.
:param token_to_probes: Dictionary of token to list of probe dictionary
:param check_fingerprints: Optional step to remove false positives.
"""
def drange(x, y, jump):
"""Because python doesn't support decimal steps..."""
while x <= y:
yield float(x)
x += decimal.Decimal(jump)
with open("jaccard_threshold_results.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=["tp", "fp", "tn", "fn", "tpr", "fpr", "accuracy", "clusters", "macs", "median"])
writer.writeheader()
for threshold in drange(0, 1.01, increment_threshold_by):
writer.writerow(cluster_with_threshold(token_to_probes, threshold, check_fingerprints))
python类DictWriter()的实例源码
def run(self):
users = {}
for tweet_str in self.input().open('r'):
tweet = json.loads(tweet_str)
user = tweet['user']['screen_name']
followers = int(tweet['user']['followers_count'])
following = int(tweet['user']['friends_count'])
if following > 0:
r = followers / float(following)
users[user] = r
with self.output().open('w') as fp_counts:
writer = csv.DictWriter(fp_counts, delimiter=',',
quoting=csv.QUOTE_MINIMAL,
fieldnames=['user', 'count'])
writer.writeheader()
for user, r in users.items():
writer.writerow({'user': user, 'count': r})
def extractFieldsFromDoc(self, doc, prefix=""):
"""Extract field values defined in self.columns from document into a dict required for csv.DictWriter."""
res = dict()
for field, value in doc.items():
if type(value) == dict: # subdocument
res.update(self.extractFieldsFromDoc(value, prefix + field + "."))
elif type(value) == list:
if type(value[0]) == dict: # multiple nested documents
nestedcolfield = self.params['nestedcolfield']
nestedvalfield = self.params['nestedvalfield']
if nestedcolfield != None and nestedvalfield != None:
for subdoc in value:
try:
nestedcol = prefix + field + "." + subdoc[nestedcolfield]
if nestedcol in self.columns:
res[nestedcol] = subdoc[nestedvalfield]
except KeyError: # nested document doesn't contains column name or value field
pass
else: # multivalued field
if prefix + field in self.columns:
res[prefix + field] = self.params["listsep"].join(value)
else: # simple value
if prefix + field in self.columns:
res[prefix + field] = value
return res
def render(self, result):
output = engine.EQUELOutput(engine.EQUELOutput.TYPE_TEXT, ["search"])
self.columns = list()
# First step: determine all columns that should appear in result of search result CSV
if len(self.params['fields']) > 0: # if field whitelist is given, take this
self.columns = self.params['fields']
else:
for doc in result.result["hits"]["hits"]: # iterate over all documents from result and pull columns from there
doccolumns = self.columnNames(doc["_source"])
for column in doccolumns:
if column not in self.columns:
self.columns.append(column)
import csv
csvwriter = csv.DictWriter(output, self.columns, dialect=self.params['dialect'])
if self.params['header']:
csvwriter.writeheader()
# Next: Iterate over documents and fill CSV with data
for doc in result.result["hits"]["hits"]:
extracted = self.extractFieldsFromDoc(doc["_source"])
csvwriter.writerow(extracted)
return output
def write_csv(contacts, output_file, dialect='unix', verbosity=0):
"""
makes a csv out of the contacts dict and
writes it to output_file (an open file descriptor)
"""
verbose_print("generating csv", verbosity, 1)
new_contacts = []
for contact in contacts:
# make contacts csv compatible
new_contacts.append(make_contact_csv_compatible(contact, verbosity))
writer = csv.DictWriter(output_file,
fieldnames=new_contacts[0].keys(),
dialect=dialect)
writer.writeheader()
for contact in new_contacts:
writer.writerow(contact)
def csv_download(request):
""" Creates a CSV file using all of the applications to the users
organization.
"""
apps = get_all_applications_for_users_org(request.user)
data = ApplicationCSVDownloadSerializer(apps, many=True).data
fields = []
for datum in data:
these_fields = list(datum.keys())
# Finds the largest set of fields and uses it
# There should not be a case where a smaller set of fields would have
# a field not in a larger one.
if len(these_fields) > len(fields):
fields = these_fields
response = HttpResponse(content_type='text/csv')
csv_writer = csv.DictWriter(response, fieldnames=fields)
csv_writer.writeheader()
csv_writer.writerows(data)
file = 'all_applications_to_%s_%s.csv' % (
request.user.profile.organization.slug,
timezone.now().strftime('%m-%d-%Y'),
)
response['Content-Disposition'] = 'attachment; filename="%s"' % file
return response
def buildSampleData(numPapers, inputDir, outputDir):
papers = set()
authors = set()
with open(dataDir + "/PaperAuthor.csv") as csvfile:
reader = csv.DictReader(csvfile)
with open(sampleDataDir + "/PaperAuthor.csv", 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=reader.fieldnames)
writer.writeheader()
for row in reader:
# make sure to stop after numPapers
if len(papers) >= numPapers:
break
papers.add(row["PaperId"])
authors.add(row["AuthorId"])
writer.writerow(row)
copyFile("Author.csv", authors, inputDir, outputDir)
copyFile("Paper.csv", papers, inputDir, outputDir)
return papers, authors
def init_csv_file (csv_file_param):
"""
Add the list of flattened event structures into the CSV file
"""
csv_file = None
if isinstance (csv_file_param, str):
# The parameter is a file-path
csv_file = open (csv_file_param, 'w', newline = '')
elif hasattr (csv_file_param, 'write'):
# The parameter is already a file (normally, stdout)
csv_file = csv_file_param
else:
# Unknown
raise IOError ('[Error] Output file parameter "' + str(csv_file_param) + '" unkown')
# Write the header
fileWriter = csv.DictWriter (csv_file, delimiter='^',
fieldnames = fieldnames,
dialect = 'unix', quoting = csv.QUOTE_NONE)
#
fileWriter.writeheader()
def write_object_labels_csv(file, labeled_data):
# write a csv file
print('[dataset] write file %s' % file)
with open(file, 'w') as csvfile:
fieldnames = ['name']
fieldnames.extend(object_categories)
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for (name, labels) in labeled_data.items():
example = {'name': name}
for i in range(20):
example[fieldnames[i + 1]] = int(labels[i])
writer.writerow(example)
csvfile.close()
def main():
writer = csv.DictWriter(
sys.stdout,
delimiter='\t',
fieldnames=['level', 'bd', 'nabd', 'naaw', 'success', 'duration']
)
writer.writeheader()
records = [translate_raw_record(r) for r in load_records()]
for rec in sorted(records, key=itemgetter('success')):
writer.writerow(rec)
print('p(success) = {}'.format(p_success(records)))
print('p(bd==c) = {}'.format(p_breakdown('c', records)))
print('p(success and bd=c) = {}'.format(p_success_and_breakdown('c', records)))
for breakdown in ['', 'ag', 'c', 'pd']:
print('p(success | bd={}) = {}'.format(
breakdown, p_success_given_breakdown(breakdown, records)))
for nabd in range(4):
print('p(success | nabd={}) = {}'.format(nabd, p_success_given_nabd(nabd, records)))
for naaw in range(6):
print('p(success | naaw={}) = {}'.format(naaw, p_success_given_naaw(naaw, records)))
def run(self, args):
student_grades = {}
csci = csv.DictReader(args.csci, fieldnames=['unixname'] + args.columns)
for line in csci:
student = line['unixname']
del line['unixname']
student_grades[student] = line
blackboard = csv.DictReader(args.blackboard)
fields = blackboard.fieldnames
# there's some weird unicode bs in the first column of a blackboard file
# this grabs just the actual column
fields[0] = fields[0][2:-1]
output = csv.DictWriter(args.output, fieldnames=blackboard.fieldnames)
output.writeheader()
for row in blackboard:
unix_name = row['Username']
new_row = row
new_row.update(student_grades[unix_name])
output.writerow(new_row)
def write_output(interval):
with open(report_file, 'w') as csvfile:
fieldnames = ['Volume Name', 'Current Data Reduction', 'Data Reduction ' + interval, 'Current Size(GB)',
'Size ' + interval + ' Ago(GB)', interval + ' Growth(GB)']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
print('Parsing volume data.')
# Loop through all volumes to get historical space data
for currentvol in allvolumes:
thisvol = array.get_volume(currentvol['name'], space='True', historical=interval)
volname = thisvol[0]['name']
volcurdr = round(thisvol[0]['data_reduction'], 2)
volstartdr = round(thisvol[len(thisvol) - 1]['data_reduction'], 2)
volstartsize = round(thisvol[0]['volumes'] / 1024 / 1024 / 1024, 2)
volcursize = round(thisvol[len(thisvol) - 1]['volumes'] / 1024 / 1024 / 1024, 2)
volsizedif = volcursize - volstartsize
volsizedif = round(volsizedif, 2)
writer.writerow(
{'Volume Name': volname, 'Current Data Reduction': volcurdr, 'Data Reduction ' + interval: volstartdr,
'Current Size(GB)': volcursize, 'Size ' + interval + ' Ago(GB)': volstartsize, interval + ' Growth(GB)': volsizedif})
def test_write_fields_not_in_fieldnames(self):
fd, name = tempfile.mkstemp()
fileobj = os.fdopen(fd, "w+b")
try:
writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
# Of special note is the non-string key (issue 19449)
with self.assertRaises(ValueError) as cx:
writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
exception = str(cx.exception)
self.assertIn("fieldnames", exception)
self.assertIn("'f4'", exception)
self.assertNotIn("'f2'", exception)
self.assertIn("1", exception)
finally:
fileobj.close()
os.unlink(name)
def test_write_fields_not_in_fieldnames(self):
fd, name = tempfile.mkstemp()
fileobj = os.fdopen(fd, "w+b")
try:
writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
# Of special note is the non-string key (issue 19449)
with self.assertRaises(ValueError) as cx:
writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
exception = str(cx.exception)
self.assertIn("fieldnames", exception)
self.assertIn("'f4'", exception)
self.assertNotIn("'f2'", exception)
self.assertIn("1", exception)
finally:
fileobj.close()
os.unlink(name)
def __init__(self, optimizer, obj_func, pop_size=1, threshold=None, max_iter=10000, out='result',
logging=False):
self.opt = optimizer
self.obj_func = obj_func
self.pop_size = pop_size
self.threshold = threshold
self.max_iter = max_iter
self.min = optimizer.w_func.min
self.out = out
self.logging = logging
if self.logging:
if not os.path.isdir(out):
os.makedirs(out)
with open(out+'/log.csv', 'w') as log_file:
self.header = ['Generation', 'BestEval'] + self.opt.generate_header() + self.opt.target.generate_header()
csv_writer = csv.DictWriter(log_file, fieldnames=self.header)
csv_writer.writeheader()
if self.threshold is None and self.min:
self.threshold = 1e-6
elif self.threshold is None:
self.threshold = 1e+6