def pull_item(self, index):
img_id = self.ids[index]
target = ET.parse(self._annopath % img_id).getroot()
img = cv2.imread(self._imgpath % img_id)
height, width, channels = img.shape
if self.target_transform is not None:
target = self.target_transform(target, width, height)
if self.transform is not None:
target = np.array(target)
img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
# to rgb
img = img[:, :, (2, 1, 0)]
# img = img.transpose(2, 0, 1)
target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
return torch.from_numpy(img).permute(2, 0, 1), target, height, width
# return torch.from_numpy(img), target, height, width
python类parse()的实例源码
def read_preposition_senses(self):
num_senses_per_prep = []
for filename in os.listdir(self.prep_senses_dir):
if '.defs.xml' in filename:
prep_str = filename.replace('.defs.xml', '')
xml_root = ElementTree.parse("%s/%s" % (self.prep_senses_dir, filename)).getroot()
senses = []
for child_el in xml_root.getchildren():
sense_id = child_el.findtext('senseid')
if sense_id is not None:
# This will add strings like 'into-1(1)'
senses.append("%s-%s" % (prep_str, sense_id))
num_senses_per_prep.append(len(senses))
self.prep_senses[prep_str] = senses
num_preps = len(self.prep_senses)
print >>sys.stderr, "Read senses for %d prepositions." % num_preps
print >>sys.stderr, "Senses per preposition: %f" % (float(sum(num_senses_per_prep))/num_preps)
# TODO: Take a coarse-grained mapping file and implement the following function.
def find_service_manifest(xml_file):
"""Find the path to the first service manifest for an application"""
root = ET.parse(xml_file).getroot()
import_elem = root.find('fabric:ServiceManifestImport', XML_NS)
if import_elem is None:
raise ValueError('Could not find service manifest import section')
ref_elem = import_elem.find('fabric:ServiceManifestRef', XML_NS)
if ref_elem is None:
raise ValueError('Could not find service manifest reference section')
manifest_name = ref_elem.attrib.get('ServiceManifestName', None)
if not manifest_name:
raise ValueError('Could not find service manifest name')
return os.path.join(os.path.dirname(xml_file), manifest_name,
'ServiceManifest.xml')
def parse_service_type(xml_file):
"""Determines the first avaliable service type and the associated
type name"""
root = ET.parse(xml_file).getroot()
service_type = root.find('fabric:ServiceTypes', XML_NS)
if service_type is None:
raise ValueError('Could not find service types in service manifest')
service_type_kind = None
if 'StatelessServiceType' in service_type[0].tag:
service_type_kind = 'stateless'
else:
# For now we only support stateless services for service creation
raise ValueError('Unsupported service type')
service_type_name = service_type[0].attrib.get('ServiceTypeName', None)
if not service_type_name:
raise ValueError('Could not find service type name')
return (service_type_kind, service_type_name)
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'softPkg'
rootClass = softPkg
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'devicepkg'
rootClass = devicepkg
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'deviceconfiguration'
rootClass = deviceconfiguration
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'domainmanagerconfiguration'
rootClass = domainmanagerconfiguration
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'profile'
rootClass = profile
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'softwareassembly'
rootClass = softwareassembly
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'softwarecomponent'
rootClass = softwarecomponent
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse(inFileName):
doc = parsexml_(inFileName)
rootNode = doc.getroot()
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'properties'
rootClass = properties
rootObj = rootClass.factory()
rootObj.build(rootNode)
# Enable Python to collect the space used by the DOM.
doc = None
## sys.stdout.write('<?xml version="1.0" ?>\n')
## rootObj.export(sys.stdout, 0, name_=rootTag,
## namespacedef_='',
## pretty_print=True)
return rootObj
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parse(stream):
"""Parse a stream containing XML into an ``InputDefinition``.
:param stream: stream containing XML to parse.
:return: definition: an ``InputDefinition`` object.
"""
definition = InputDefinition()
# parse XML from the stream, then get the root node
root = ET.parse(stream).getroot()
for node in root:
if node.tag == "configuration":
# get config for each stanza
definition.inputs = parse_xml_data(node, "stanza")
else:
definition.metadata[node.tag] = node.text
return definition
def parse(stream):
"""Parse a stream containing XML into an ``InputDefinition``.
:param stream: stream containing XML to parse.
:return: definition: an ``InputDefinition`` object.
"""
definition = InputDefinition()
# parse XML from the stream, then get the root node
root = ET.parse(stream).getroot()
for node in root:
if node.tag == "configuration":
# get config for each stanza
definition.inputs = parse_xml_data(node, "stanza")
else:
definition.metadata[node.tag] = node.text
return definition
def get_pub_dic_xml(file_name = 'data/proton-beam-all.xml'):
tree = ET.parse(file_name)
root = tree.getroot()[0]
# Create dic of : id -> text features
pub_dic = {}
for pub in root:
rec_number = int (get_text (pub.find('rec-number')))
abstract = get_text (pub.find('abstract'))
title = get_text (pub.find('titles')[0])
text = title + abstract
for kw in pub.find('keywords'):
text = text + kw.text + ' '
pub_dic[rec_number] = text
return pub_dic
def youtube_channel(channel_id):
import xml.etree.ElementTree as ET
import urllib.request
import datetime
url = "https://www.youtube.com/feeds/videos.xml?max-results=50&channel_id=" + channel_id
videos = []
with urllib.request.urlopen(url) as f:
root = ET.parse(f).getroot()
for entry in root.iter('{http://www.w3.org/2005/Atom}entry'):
video = {}
video['id'] = entry.find('{http://www.youtube.com/xml/schemas/2015}videoId').text
video['date'] = datetime.datetime.strptime(entry.find('{http://www.w3.org/2005/Atom}published').text, '%Y-%m-%dT%H:%M:%S+00:00')
video['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text
videos.append(video)
return videos
def youtube_channel(channel_id):
import xml.etree.ElementTree as ET
import urllib.request
import datetime
url = "https://www.youtube.com/feeds/videos.xml?max-results=50&channel_id=" + channel_id
videos = []
with urllib.request.urlopen(url) as f:
root = ET.parse(f).getroot()
for entry in root.iter('{http://www.w3.org/2005/Atom}entry'):
video = {}
video['id'] = entry.find('{http://www.youtube.com/xml/schemas/2015}videoId').text
video['date'] = datetime.datetime.strptime(entry.find('{http://www.w3.org/2005/Atom}published').text, '%Y-%m-%dT%H:%M:%S+00:00')
video['title'] = entry.find('{http://www.w3.org/2005/Atom}title').text
videos.append(video)
return videos
def parse_file(self):
if self.path.endswith("gz"):
f = gzip.GzipFile(self.path)
else:
f = open(self.path)
tree = ET.parse(f)
f.close()
root = tree.getroot()
current_urbs = {}
for child in root:
p = USBPacket(child)
urb_id = p["usb.urb_id"]
urb_status = p["usb.urb_status"]
urb_type = p["usb.urb_type"]
if (urb_type == URB_TYPE_SUBMIT):
current_urbs[urb_id] = p
if (urb_type == URB_TYPE_COMPLETED):
if (urb_id not in current_urbs):
print("Urb id not present: {:x}".format(urb_id))
else:
submit = current_urbs[urb_id]
completed = p
self.usb_transaction(submit, completed)
del current_urbs[urb_id]
def parse_voc_rec(filename):
"""
parse pascal voc record into a dictionary
:param filename: xml file path
:return: list of dict
"""
import xml.etree.ElementTree as ET
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_dict = dict()
obj_dict['name'] = obj.find('name').text
obj_dict['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_dict['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_dict)
return objects
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def parseXML(self):
assert self.filepath.endswith(XML_EXT), "Unsupport file format"
parser = etree.XMLParser(encoding=ENCODE_METHOD)
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
try:
verified = xmltree.attrib['verified']
if verified == 'yes':
self.verified = True
except KeyError:
self.verified = False
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
# Add chris
difficult = False
if object_iter.find('difficult') is not None:
difficult = bool(int(object_iter.find('difficult').text))
self.addShape(label, bndbox, difficult)
return True
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text) - 1,
int(bbox.find('ymin').text) - 1,
int(bbox.find('xmax').text) - 1,
int(bbox.find('ymax').text) - 1]
objects.append(obj_struct)
return objects
def pull_anno(self, index):
'''Returns the original annotation of image at index
Note: not using self.__getitem__(), as any transformations passed in
could mess up this functionality.
Argument:
index (int): index of img to get annotation of
Return:
list: [img_id, [(label, bbox coords),...]]
eg: ('001718', [('dog', (96, 13, 438, 332))])
'''
img_id = self.ids[index]
anno = ET.parse(self._annopath % img_id).getroot()
gt = self.target_transform(anno, 1, 1)
return img_id[1], gt
def clean(path):
for f_xml in glob.iglob(os.path.join(path, '*.xml')):
print(f_xml)
f_txt = os.path.splitext(f_xml)[0]
with io.open(f_txt, mode='w', encoding='utf-8') as fd_txt:
root = ET.parse(f_xml).getroot()[0]
for doc in root.findall('doc'):
for e in doc.findall('seg'):
fd_txt.write(e.text.strip() + '\n')
xml_tags = ['<url', '<keywords', '<talkid', '<description',
'<reviewer', '<translator', '<title', '<speaker']
for f_orig in glob.iglob(os.path.join(path, 'train.tags*')):
print(f_orig)
f_txt = f_orig.replace('.tags', '')
with io.open(f_txt, mode='w', encoding='utf-8') as fd_txt, \
io.open(f_orig, mode='r', encoding='utf-8') as fd_orig:
for l in fd_orig:
if not any(tag in l for tag in xml_tags):
fd_txt.write(l.strip() + '\n')
def test_to_dict_from_etree(self):
vh_xml_tree = _ElementTree.parse('examples/vehicles/vehicles.xml')
col_xml_tree = _ElementTree.parse('examples/collection/collection.xml')
xml_dict = self.vh_schema.to_dict(vh_xml_tree)
self.assertNotEqual(xml_dict, _VEHICLES_DICT) # XSI namespace unmapped
xml_dict = self.vh_schema.to_dict(vh_xml_tree, namespaces=self.namespaces)
self.assertEqual(xml_dict, _VEHICLES_DICT)
xml_dict = xmlschema.to_dict(vh_xml_tree, self.vh_schema.url, namespaces=self.namespaces)
self.assertEqual(xml_dict, _VEHICLES_DICT)
xml_dict = self.col_schema.to_dict(col_xml_tree)
self.assertNotEqual(xml_dict, _COLLECTION_DICT)
xml_dict = self.col_schema.to_dict(col_xml_tree, namespaces=self.namespaces)
self.assertEqual(xml_dict, _COLLECTION_DICT)
xml_dict = xmlschema.to_dict(col_xml_tree, self.col_schema.url, namespaces=self.namespaces)
self.assertEqual(xml_dict, _COLLECTION_DICT)