def __init__(self, data_def = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
self.tree_lock = RLock()
with self.tree_lock:
self.dtc = DataTreeConstants()
self.known_urlid = (0, 4, 11, 14)
self.known_linkid = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
self.errorcode = dte.dtDataDefOK
self.caller_id = caller_id
self.cdata_def = {}
self.ddtype = ""
if sys.modules['DataTreeGrab']._warnings == None:
sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id)
elif caller_id not in sys.modules['DataTreeGrab']._warnings._ids or warnaction != None:
sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id)
if isinstance(data_def, dict):
self.data_def = data_def
self.convert_data_def()
else:
self.data_def = {}
python类__init__()的实例源码
def __init__(self, dtree, data = None, parent = None, key = None):
self.type = "value"
self.key = key
self.keys = []
self.key_index = {}
self.value = None
DATAnode.__init__(self, dtree, parent)
with self.node_lock:
if isinstance(data, list):
self.type = "list"
for k in range(len(data)):
JSONnode(self.dtree, data[k], self, k)
elif isinstance(data, dict):
self.type = "dict"
for k, item in data.items():
JSONnode(self.dtree, item, self, k)
else:
self.type = "value"
self.value = data
def __init__(self, data, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
DATAtree.__init__(self, output, warnaction, warngoal, caller_id)
with self.tree_lock:
self.tree_type ='json'
self.extract_from_parent = True
self.data = data
# Read the json data into the tree
try:
self.root = JSONnode(self, data, key = 'ROOT')
self.start_node = self.root
except:
self.warn('Unable to parse the JSON data. Invalid dataset!', dtDataWarning, 1)
self.start_node = NULLnode()
# end JSONtree
def __init__(self, data_def, data = None, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
self.tree_lock = RLock()
with self.tree_lock:
self.dtc = DataTreeConstants()
self.ddconv = DataDef_Convert(warnaction = warnaction , warngoal = warngoal, caller_id = caller_id)
self.caller_id = caller_id
self.print_tags = False
self.print_searchtree = False
self.show_result = False
self.fle = sys.stdout
if sys.modules['DataTreeGrab']._warnings == None:
sys.modules['DataTreeGrab']._warnings = _Warnings(warnaction, warngoal, caller_id)
else:
sys.modules['DataTreeGrab']._warnings.set_warnaction(warnaction, caller_id)
self.searchtree = None
self.timezone = pytz.utc
self.errorcode = dte.dtDataInvalid
self.result = []
self.data_def = None
self.init_data_def(data_def)
if data != None:
self.init_data(data)
def __init__(self, base_url, url=None):
""" Initializer
:param base_url: site base url
:param url: current url
"""
HTMLParser.__init__(self)
self.cache = {}
self.items = []
self.pages = {}
self.total_pages = 0
self.base_url = base_url
self.url = self.base_url
self.site_total_pages = 0
self.CACHE_SIZE = 500
if url:
self.url = url
def __init__(self, name, outbox, max_task):
'''
@name: ???, ???????????,
@outbox: ???????? url ????
@max_task: ????????? (????????? coroutine ??)
'''
multiprocessing.Process.__init__(self)
self.name = name
self.inbox = multiprocessing.Queue() # ??????????? url
self.outbox = outbox
self.max_task = max_task
self.doing = multiprocessing.Value('i', 0)
self._doing = set()
self.result = set() # ?????? url
self.loop = None
def __init__(self):
"""Initialize attributes."""
if sys.version.startswith('3.'):
# Python 3.x
super().__init__(convert_charrefs=False)
else:
# use HTMLParser.__init__ because HTMLParser is an 'old' style class, which cannot be passed to super()
# see http://codependentcodr.blogspot.com/2012/02/python-htmlparser-and-super.html
HTMLParser.__init__(self)
self._root = _HtmlHeaderNode(level=0) # root node with no data of itself, only 'children' matters
self._curr_node = self._root # most recently handled header node
self._in_header = False
self._header_id_count = {} # record header ids to avoid collisions
self._html = '' # full HTML string parsed
self._temp_start_tag = '' # temporary HTML start tag of this current header node
def __init__(self, results, url):
HTMLParser.__init__(self)
self.results = results
self.url = url
self.current_item = {} # One torrent result
self.add_query = True
self.torrent_info_index = 0 # Count of the meta data encountered
self.torrent_info_array = []
self.meta_data_grabbing = 0
self.meta_data_array = []
self.torrent_no_files = 0
self.torrent_date_added = 0
self.torrent_popularity = 0
self.mangnet_link = ""
self.desc_link = ""
self.torrent_name = ""
def __init__(self, model, label, data=[]):
""" Returns a new Model calibrated on the given data,
which is a set of (vector, label)-tuples.
"""
self._model = model
self._label = label
# Isotonic regression:
y = ((model.predict(v)[label], label == x) for v, x in data)
y = sorted(y) # monotonic
y = zip(*y)
y = list(y or ((),()))
x = list(y[0])
y = list(y[1])
y = pav(y)
x = [0] + x + [1]
y = [0] + y + [1]
f = {}
i = 0
# Linear interpolation:
for p in range(100 + 1):
p *= 0.01
while x[i] < p:
i += 1
f[p] = (y[i-1] * (x[i] - p) + y[i] * (p - x[i-1])) / (x[i] - x[i-1])
self._f = f
def __init__(self, path='WordNet-3.0'):
""" Opens the WordNet database from the given path
(that contains dict/index.noun, dict/data.noun, ...)
"""
self._f = {} # {'n': <open file 'dict/index.noun'>}
for k, v in (('n', 'noun'), ('v', 'verb'), ('a', 'adj' ), ('r', 'adv' )):
f = cd(path, 'dict', 'data.%s' % v)
f = open(f, 'rb')
self._f[k] = f
f = cd(path, 'dict', 'index.%s' % v)
f = open(f, 'r')
for s in f:
if not s.startswith(' '):
s = s.strip()
s = s.split(' ')
p = s[-int(s[2]):]
w = s[0]
w = w.replace('_', ' ')
self[w, k] = p # {('grasp', 'n'): (offset1, ...)}
f.close()
def __init__(
self,
decode_html_entities=False,
data_separator=' ',
):
HTMLParser.__init__(self)
self._parse_html_entities = decode_html_entities
self._data_separator = data_separator
self._in_td = False
self._in_th = False
self._current_table = []
self._current_row = []
self._current_cell = []
self.tables = []
def get_links(html): # ????????
class URLSeeker(HTMLParser):
def __init__(self):
HTMLParser.__init__(self) # ?? ???super.__init__(self)
self.urls = []
def handle_starttag(self, tag, attrs):
href = dict(attrs).get('href')
if href and tag == 'a':
self.urls.append(href)
url_seeker = URLSeeker()
url_seeker.feed(html)
print('@@'*20)
print(url_seeker.urls)
print('@@'*20)
return url_seeker.urls # ?????????
def __init__(self):
HTMLParser.__init__(self)
self._in_td = False
self._in_th = False
self._current_table = []
self._current_row = []
self._current_cell = []
self.tables = []
def __init__(self):
HTMLParser.__init__(self)
self.buf = []
self.last_text = []
self.hide_output = False
self.tag_count = 0
self.current_tag = None
def __init__(self):
HTMLParser.__init__(self)
self.event_time = []
self.event_title = []
self.event_location = []
self.in_time = False
self.in_title = False
self.in_location = False
def __init__(self, *args, **kwargs):
HTMLParser.__init__(self, *args, **kwargs)
# Keep a list of empty-element tags that were encountered
# without an explicit closing tag. If we encounter a closing tag
# of this type, we'll associate it with one of those entries.
#
# This isn't a stack because we don't care about the
# order. It's a list of closing tags we've already handled and
# will ignore, assuming they ever show up.
self.already_closed_empty_element = []
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
kwargs['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.snf = StyledNoteFormatter(self)
def __init__(self, form):
self.form = form
self.database = form.database
self._backend = WebAppBackend()
self._backend.build_link = self.build_link
def __init__(self, form):
HTMLParser.__init__(self)
self.form = form
self.__text = ""
self.__tags = {}
self.__stack = []
def __init__(self):
self.data = []
self.href = 0
self.linkname = ''
self.patt = re.compile(r'^/doc/\d+$')
HTMLParser.__init__(self)
def __init__(self):
self.data = set([])
self.href = 0
self.patt = re.compile(r'^\?p=\d+$')
HTMLParser.__init__(self)
def __init__(self, allows = []):
HTMLParser.__init__(self)
self.allow_tags = allows if allows else self.allow_tags
self.result = []
self.start = []
self.data = []
def __init__(self, allows = []):
HTMLParser.__init__(self)
self.allow_tags = allows if allows else self.allow_tags
self.result = []
self.start = []
self.data = []
def __init__(self, warnaction = None, warngoal = sys.stderr, caller_id = 0):
self.warn_lock = RLock()
self.onceregistry = {}
self.filters = []
self._ids = []
if not caller_id in self._ids:
self._ids.append(caller_id)
self.warngoal = warngoal
if warnaction == None:
warnaction = "default"
self.set_warnaction(warnaction, caller_id)
def __init__(self, dtree, parent = None):
self.node_lock = RLock()
with self.node_lock:
self.dtc = DataTreeConstants()
self.children = []
self.dtree = dtree
self.parent = parent
self.value = None
self.child_index = 0
self.level = 0
self.links = {}
self.links["values"] = {}
self.links["nodes"] = {}
self.end_links = {}
self.end_links["values"] = {}
self.end_links["nodes"] = {}
self.is_root = bool(self.parent == None)
n = self
while not n.is_root:
n = n.parent
self.root = n
if isinstance(parent, DATAnode):
self.parent.append_child(self)
self.level = parent.level + 1
def __init__(self, dtree, data = None, parent = None):
self.tag = u''
self.text = u''
self.tail = u''
self.attributes = {}
self.attr_names = []
DATAnode.__init__(self, dtree, parent)
with self.node_lock:
if isinstance(data, (str, unicode)):
self.tag = data.lower().strip()
elif isinstance(data, list):
if len(data) > 0:
self.tag = data[0].lower().strip()
if len(data) > 1 and isinstance(data[1], (list, tuple)):
for a in data[1]:
if isinstance(a[1], (str, unicode)):
self.attributes[a[0].lower().strip()] = a[1].strip()
else:
self.attributes[a[0].lower().strip()] = a[1]
if 'class' in self.attributes.keys():
self.attr_names.append('class')
if 'id' in self.attributes.keys():
self.attr_names.append('id')
for a in self.attributes.keys():
if a not in self.attr_names:
self.attr_names.append(a)
def __init__(self, data, autoclose_tags=[], print_tags = False, output = sys.stdout, warnaction = "default", warngoal = sys.stderr, caller_id = 0):
HTMLParser.__init__(self)
DATAtree.__init__(self, output, warnaction, warngoal, caller_id)
with self.tree_lock:
self.tree_type ='html'
self.print_tags = print_tags
self.autoclose_tags = autoclose_tags
self.is_tail = False
self.root = HTMLnode(self, 'root')
self.current_node = self.root
self.last_node = None
self.text = u''
self.open_tags = {}
self.count_tags(data)
# read the html page into the tree
try:
# Cover for incomplete reads where the essentiel body part is retrieved
for ctag in ('body', 'BODY', 'html', 'HTML', 'xml', 'XML'):
if u'<%s>' % (ctag, ) in data and not u'</%s>' % (ctag, ) in data:
data = u'%s</%s>' % (data, ctag)
self.feed(data)
self.reset()
self.start_node = self.root
except:
self.warn('Unable to parse the HTML data. Invalid dataset!', dtDataWarning, 1)
self.start_node = NULLnode()
def get_links(html):
class URLSeeker(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.urls = []
def handle_starttag(self, tag, attrs):
href = dict(attrs).get('href')
if href and tag == 'a':
self.urls.append(href)
url_seeker = URLSeeker()
url_seeker.feed(html)
return url_seeker.urls
def __init__(self):
HTMLParser.__init__(self)
self.entity = None
self.state = 'IDLE'
self.data = {
'rwattrs': [],
'roattrs': [],
'addattrs': [],
'updateattrs': [],
}
self.new_current_attr()
self.re_json_arg = re.compile(r'"([^"]+)": *<[^>]+>')