def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0):
# establish a "logical" server connection
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError, "unsupported XML-RPC protocol"
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
python类splithost()的实例源码
def request(self, method, url, body=None, headers={}):
# Request is called before connect, so can interpret url and get
# real host/port to be used to make CONNECT request to proxy
proto, rest = urllib.splittype(url)
if proto is None:
raise ValueError, "unknown URL type: %s" % url
# Get host
host, rest = urllib.splithost(rest)
# Try to get port
host, port = urllib.splitport(host)
# If port is not defined try to get from proto
if port is None:
try:
port = self._ports[proto]
except KeyError:
raise ValueError, "unknown protocol for: %s" % url
self._real_host = host
self._real_port = int(port)
httplib.HTTPConnection.request(self, method, url, body, headers)
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0):
# establish a "logical" server connection
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported XML-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def make_connection(self, host):
self.realhost = host
proxies = urllib.getproxies()
proxyurl = None
if 'http' in proxies:
proxyurl = proxies['http']
elif 'all' in proxies:
proxyurl = proxies['all']
if proxyurl:
urltype, proxyhost = urllib.splittype(proxyurl)
host, selector = urllib.splithost(proxyhost)
h = httplib.HTTP(host)
self.proxy_is_used = True
return h
else:
self.proxy_is_used = False
return Transport.make_connection(self, host)
def __init__(self, uri, transport=None, encoding=None, verbose=False,
allow_none=False, use_datetime=False):
type, uri = urllib.splittype(uri)
if type not in ('scgi'):
raise IOError('unsupported XML-RPC protocol')
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = '/'
if transport is None:
transport = SCGITransport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
def __init__(self, uri, transport=None, verbose=False, binary=True,
compressRequest=True, acceptCompressedResponse=True):
"""Establish a "logical" server connection."""
# get the url
import urllib
typ, uri = urllib.splittype(uri)
if typ not in ('http', 'https'):
raise IOError('unsupported Pickle-RPC protocol')
self._host, self._handler = urllib.splithost(uri)
if not self._handler:
self._handler = '/PickleRPC'
if transport is None:
transport = (SafeTransport if typ == 'https' else Transport)()
self._transport = transport
self._verbose = verbose
self._binary = binary
self._compressRequest = compressRequest
self._acceptCompressedResponse = acceptCompressedResponse
def __init__(self, uri, transport=None, encoding=None, verbose=0,version=None):
self.location = uri # server location (url)
self.trace = verbose # show debug messages
self.exceptions = True # raise errors? (JSONRPCError)
self.timeout = None
self.json_request = self.json_response = ''
self.version = version # '2.0' for jsonrpc2
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError("unsupported JSON-RPC protocol")
self.__host, self.__handler = urllib.splithost(uri)
if transport is None:
if type == "https":
transport = JSONSafeTransport()
else:
transport = JSONTransport()
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0):
# establish a "logical" server connection
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError, "unsupported XML-RPC protocol"
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
def _spliturl(url):
scheme, opaque = urllib.splittype(url)
netloc, path = urllib.splithost(opaque)
host, port = urllib.splitport(netloc)
# Strip brackets if its an IPv6 address
if host.startswith('[') and host.endswith(']'): host = host[1:-1]
if port is None: port = DEFAULT_PORT
return scheme, host, port, path
# Given an HTTP request handler, this wrapper objects provides a related
# family of convenience methods built using that handler.
def savefilename(self, url):
type, rest = urllib.splittype(url)
host, path = urllib.splithost(rest)
path = path.lstrip("/")
user, host = urllib.splituser(host)
host, port = urllib.splitnport(host)
host = host.lower()
if not path or path[-1] == "/":
path = path + "index.html"
if os.sep != "/":
path = os.sep.join(path.split("/"))
if os.name == "mac":
path = os.sep + path
path = os.path.join(host, path)
return path
def _spliturl(url):
scheme, opaque = urllib.splittype(url)
netloc, path = urllib.splithost(opaque)
host, port = urllib.splitport(netloc)
# Strip brackets if its an IPv6 address
if host.startswith('[') and host.endswith(']'): host = host[1:-1]
if port is None: port = DEFAULT_PORT
return scheme, host, port, path
# Given an HTTP request handler, this wrapper objects provides a related
# family of convenience methods built using that handler.
def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0, context=None):
# establish a "logical" server connection
if isinstance(uri, unicode):
uri = uri.encode('ISO-8859-1')
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError, "unsupported XML-RPC protocol"
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime, context=context)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
def __init__(self, url, config = Config):
proto, uri = urllib.splittype(url)
# apply some defaults
if uri[0:2] != '//':
if proto != None:
uri = proto + ':' + uri
uri = '//' + uri
proto = 'http'
host, path = urllib.splithost(uri)
try:
int(host)
host = 'localhost:' + host
except:
pass
if not path:
path = '/'
if proto not in ('http', 'https', 'httpg'):
raise IOError, "unsupported SOAP protocol"
if proto == 'httpg' and not config.GSIclient:
raise AttributeError, \
"GSI client not supported by this Python installation"
if proto == 'https' and not config.SSLclient:
raise AttributeError, \
"SSL client not supported by this Python installation"
self.user,host = urllib.splituser(host)
self.proto = proto
self.host = host
self.path = path
def parse(self, response):
def getdomain(url):
proto, rest = urllib.splittype(url)
host, rest = urllib.splithost(rest)
return "http://"+host
sel=scrapy.Selector(response)
links_in_a_page = sel.xpath('//a[@href]')
for link_sel in links_in_a_page:
item=QqurlItem()
link=str(link_sel.re('href="(.*?)"')[0])
if link:
if not link.startswith('http'):
if link.startswith('javascript'):
continue
if link.startswith('//support'):
continue
link=getdomain(response.url)+link
if re.match('.*comment.*',link):
continue
yield scrapy.Request(link,callback=self.parse)
if not re.match('.*comment.*',link):
if re.match('^http.*qq.com.*\.s?html?$',link):
item['link']=link
yield item
def parse(self, response):
def getdomain(url):
#proto,rest=urllib.splittype(url)
#host,rest=urllib.splithost(rest)
return "http:"
sel = scrapy.Selector(response)
links_in_a_page=sel.xpath('//a[@href]')
for link_sel in links_in_a_page:
item=SohuItem()
link=str(link_sel.re('href="(.*?)"')[0])
if link:
if not link.startswith('http'):
link=getdomain(response.url)+link
yield scrapy.Request(link,callback=self.parse)
p1=re.compile(r'.*/a/.*')
p2=re.compile(r'.*#comment_area$')
p3=re.compile(r'.*news.sohu.com.*s?html?$')
if (re.match(p3,link) or re.match(p1,link)) and (not re.match(p2,link)):
#print ('T: '+link)
item['link']=link
yield item
else:
pass
#print ('F: '+link)
def parse_host(self):
proto, rest = urllib.splittype(self.get_host())
host, rest = urllib.splithost(rest)
host, port = urllib.splitport(host)
return host
def __init__(self, url, config = Config):
proto, uri = urllib.splittype(url)
# apply some defaults
if uri[0:2] != '//':
if proto != None:
uri = proto + ':' + uri
uri = '//' + uri
proto = 'http'
host, path = urllib.splithost(uri)
try:
int(host)
host = 'localhost:' + host
except:
pass
if not path:
path = '/'
if proto not in ('http', 'https', 'httpg'):
raise IOError, "unsupported SOAP protocol"
if proto == 'httpg' and not config.GSIclient:
raise AttributeError, \
"GSI client not supported by this Python installation"
if proto == 'https' and not config.SSLclient:
raise AttributeError, \
"SSL client not supported by this Python installation"
self.user,host = urllib.splituser(host)
self.proto = proto
self.host = host
self.path = path
def _spliturl(url):
scheme, opaque = urllib.splittype(url)
netloc, path = urllib.splithost(opaque)
host, port = urllib.splitport(netloc)
# Strip brackets if its an IPv6 address
if host.startswith('[') and host.endswith(']'): host = host[1:-1]
if port is None: port = DEFAULT_PORT
return scheme, host, port, path
# Given an HTTP request handler, this wrapper objects provides a related
# family of convenience methods built using that handler.
def __init__(self, url, config = Config):
proto, uri = urllib.splittype(url)
# apply some defaults
if uri[0:2] != '//':
if proto != None:
uri = proto + ':' + uri
uri = '//' + uri
proto = 'http'
host, path = urllib.splithost(uri)
try:
int(host)
host = 'localhost:' + host
except:
pass
if not path:
path = '/'
if proto not in ('http', 'https', 'httpg'):
raise IOError, "unsupported SOAP protocol"
if proto == 'httpg' and not config.GSIclient:
raise AttributeError, \
"GSI client not supported by this Python installation"
if proto == 'https' and not config.SSLclient:
raise AttributeError, \
"SSL client not supported by this Python installation"
self.user,host = urllib.splituser(host)
self.proto = proto
self.host = host
self.path = path
def test_splithost(self):
splithost = urllib.splithost
self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
('www.example.org:80', '/foo/bar/baz.html'))
self.assertEqual(splithost('//www.example.org:80'),
('www.example.org:80', ''))
self.assertEqual(splithost('/foo/bar/baz.html'),
(None, '/foo/bar/baz.html'))
def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0, context=None):
# establish a "logical" server connection
if unicode and isinstance(uri, unicode):
uri = uri.encode('ISO-8859-1')
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError, "unsupported XML-RPC protocol"
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime, context=context)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none
def savefilename(self, url):
type, rest = urllib.splittype(url)
host, path = urllib.splithost(rest)
path = path.lstrip("/")
user, host = urllib.splituser(host)
host, port = urllib.splitnport(host)
host = host.lower()
if not path or path[-1] == "/":
path = path + "index.html"
if os.sep != "/":
path = os.sep.join(path.split("/"))
path = os.path.join(host, path)
return path
def test_splithost(self):
splithost = urllib.splithost
self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
('www.example.org:80', '/foo/bar/baz.html'))
self.assertEqual(splithost('//www.example.org:80'),
('www.example.org:80', ''))
self.assertEqual(splithost('/foo/bar/baz.html'),
(None, '/foo/bar/baz.html'))
def __init__(self, uri, transport=None, encoding=None, verbose=0,
allow_none=0, use_datetime=0, context=None):
# establish a "logical" server connection
if unicode and isinstance(uri, unicode):
uri = uri.encode('ISO-8859-1')
# get the url
import urllib
type, uri = urllib.splittype(uri)
if type not in ("http", "https"):
raise IOError, "unsupported XML-RPC protocol"
self.__host, self.__handler = urllib.splithost(uri)
if not self.__handler:
self.__handler = "/RPC2"
if transport is None:
if type == "https":
transport = SafeTransport(use_datetime=use_datetime, context=context)
else:
transport = Transport(use_datetime=use_datetime)
self.__transport = transport
self.__encoding = encoding
self.__verbose = verbose
self.__allow_none = allow_none