def load_csv_dataset(filename):
"""
Loads a csv filename as a dataset
:param str filename: name of the file
:return List[DataSample]: a list of DataSample
"""
dataset = []
with open(os.path.join(DIR_GENERATED_DATA, filename), 'rb') as file:
reader = csv.reader(file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL,
errors='ignore')
for row in reader:
id = int(row[0])
text = row[1]
gene = row[2]
variation = row[3]
try:
real_class = int(row[4])
except:
real_class = None
dataset.append(DataSample(id, text, gene, variation, real_class))
return dataset
python类QUOTE_MINIMAL的实例源码
preprocess_data.py 文件源码
项目:kaggle_redefining_cancer_treatment
作者: jorgemf
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def hardwareasset_export(self, request):
"""Export all HardwareAssets to a CSV.
"""
f = StringIO()
writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL, encoding='utf-8')
writer.writerow([
'ASSET TAG', 'VENDOR', 'MODEL TYPE', 'HARDWARE MODEL', 'SERIAL', 'STATUS',
'DATE PURCHASED', 'LOCATION', 'ASSIGNED USER', 'SERVICE REQUEST URL'])
for i in HardwareAsset.objects.all():
writer.writerow([
i.asset_tag, i.vendor, i.hardware_model.get_model_type_display(),
i.hardware_model, i.serial, i.get_status_display(),
datetime.strftime(i.date_purchased, '%d/%b/%Y') if i.date_purchased else '',
i.location if i.location else '', i.assigned_user if i.assigned_user else '',
i.service_request_url])
response = HttpResponse(f.getvalue(), content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename=hardwareasset_export.csv'
return response
def render(self, output):
session = DBSession()
events = session.query(Event)
if self.start:
events = events.filter(Event.time >= "{}".format(self.start))
if self.end:
events = events.filter(Event.time <= "{}".format(self.end))
events = events.order_by(Event.time)
data = DataSet(
events,
lambda e: e.time.date()
)
with open(output, 'wb') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
writer.writerow(['date', 'time', 'type', 'subtype', 'source', 'value', 'unit', 'notes', 'tags'])
for (day, daily_log) in data.group.items():
for event in daily_log:
writer.writerow([
event.time.strftime('%Y-%m-%d'),
event.time.strftime('%I:%M:%S %p'),
event.type,
event.subtype,
SOURCE_NAME[event.source],
event.value,
event.unit,
event.notes,
event.tags,
])
def render(self, output):
session = DBSession()
events = session.query(GlucoseEvent)
events = events.filter(Event.subtype == GlucoseEvent.TYPE_METER)
events = events.filter(
or_(
Event.tags == None,
Event.tags == '',
not_(Event.tags.like(r'%Manual%'))
)
)
if self.start:
events = events.filter(Event.time >= "{}".format(self.start))
if self.end:
events = events.filter(Event.time <= "{}".format(self.end))
events = events.order_by(Event.time)
data = DataSet(
events,
lambda e: e.time.date()
)
with open(output, 'wb') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
writer.writerow(['date', 'time', 'source', 'value', 'unit', 'tags'])
for (day, daily_log) in data.group.items():
for event in daily_log:
writer.writerow([
event.time.strftime('%Y-%m-%d'),
event.time.strftime('%I:%M:%S %p'),
SOURCE_NAME[event.source],
event.value,
event.unit,
event.tags,
])
preprocess_data.py 文件源码
项目:kaggle_redefining_cancer_treatment
作者: jorgemf
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def save_csv_dataset(filename, dataset):
"""
Saves a dataset into a file
:param str filename: name of the file
:param List[DataSample] dataset: dataset
"""
with open(os.path.join(DIR_GENERATED_DATA, filename), 'wb') as file:
writer = csv.writer(file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# for d in dataset:
for i, d in enumerate(dataset):
writer.writerow([str(d.id), d.text, d.gene, d.variation, str(d.real_class)])
preprocess_data.py 文件源码
项目:kaggle_redefining_cancer_treatment
作者: jorgemf
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def load_csv_wikipedia_gen(filename):
"""
Loads a csv filename as a wikipedia genes dataset
:param str filename: name of the file
:return List[WikipediaGene]: a list of WikipediaGene
"""
dataset = []
with open(os.path.join(DIR_GENERATED_DATA, filename)) as file:
reader = csv.reader(file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
dataset.append(WikipediaGene(row[0], row[1]))
return dataset
preprocess_data.py 文件源码
项目:kaggle_redefining_cancer_treatment
作者: jorgemf
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def save_csv_wikipedia_gen(filename, wikipedia_genes):
"""
Saves the wikipedia genes into a file
:param str filename: name of the file
:param List[WikipediaGene] wikipedia_genes: WikipediaGene dataset
"""
with open(os.path.join(DIR_GENERATED_DATA, filename), 'wb') as file:
writer = csv.writer(file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for d in wikipedia_genes:
writer.writerow([str(d.gene), d.text])
####################################################################################################
def toCSV(self, delimiter = '\t'):
NOT_ALLOWED_FIELDS = ['evidence.evidence_chain', 'search_metadata', 'search_metadata.sort']
output = BytesIO()
if not self.data:
self.flatten(self.toDict()) # populate data if empty
if self.data and isinstance(self.data[0], dict):
key_set = set()
flattened_data = []
for row in self.data:
flat = self.flatten(row,
simplify=self.params.datastructure == SourceDataStructureOptions.SIMPLE)
for field in NOT_ALLOWED_FIELDS:
flat.pop(field, None)
flattened_data.append(flat)
key_set.update(flat.keys())
ordered_keys=self.params.fields or sorted(list(key_set))
ordered_keys = map(unicode,ordered_keys)
writer = csv.DictWriter(output,
ordered_keys,
restval='',
delimiter=delimiter,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
doublequote=False,
escapechar='\\',
# extrasaction='ignore',
)
writer.writeheader()
for row in flattened_data:
writer.writerow(row)
if self.data and isinstance(self.data[0], list):
writer = csv.writer(output,
delimiter=delimiter,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
doublequote=False,
escapechar='\\',
# extrasaction = 'ignore',
)
for row in self.data:
writer.writerow(row)
return output.getvalue()