def process_item(self, item, spider):
valid = True
for data in item:
if not data:
valid = False
raise DropItem('Missing{0}!'.format(data))
if valid:
self.collection.insert(dict(item))
log.msg('??????!', level=log.DEBUG, spider=spider)
return item
# def testdb(self):
# # ???MongoHQ
# con = pymongo.Connection("paulo.mongohq.com",10042)
# db = con.mytest
# db.authenticate("root", "sa123")
# db.urllist.drop()
python类Connection()的实例源码
def mongo_connect(host=None, port=None,ssl=False, user=None,passwd=None,replica=None):
try:
# ssl connection for pymongo > 2.1
if pymongo.version >= "2.1":
if replica is None:
con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, ssl=ssl, network_timeout=10)
else:
con = pymongo.Connection(host, port, read_preference=pymongo.ReadPreference.SECONDARY, ssl=ssl, replicaSet=replica, network_timeout=10)
else:
if replica is None:
con = pymongo.Connection(host, port, slave_okay=True, network_timeout=10)
else:
con = pymongo.Connection(host, port, slave_okay=True, replicaSet=replica, network_timeout=10)
if user and passwd:
db = con["admin"]
if not db.authenticate(user, passwd): sys.exit("Username/Password incorrect")
except Exception, e:
if isinstance(e,pymongo.errors.AutoReconnect) and str(e).find(" is an arbiter") != -1:
# We got a pymongo AutoReconnect exception that tells us we connected to an Arbiter Server
# This means: Arbiter is reachable and can answer requests/votes - this is all we need to know from an arbiter
print "OK - State: 7 (Arbiter)"
sys.exit(0)
return exit_with_general_critical(e),None
return 0,con
def init_db(self):
self.redis = None
self.mongo_conf = None
self.mongo = None
try:
self.mongo_conf = yield txmongo.MongoConnection(host=self.setting.CONF_MONGO_HOST, port=self.setting.CONF_MONGO_PORT)
self.mongo = yield txmongo.MongoConnection(host=self.setting.DATA_MONGO_HOST, port=self.setting.DATA_MONGO_PORT)
self.redis = yield txredisapi.Connection(host=self.setting.QUEUE_REDIS_HOST, port=self.setting.QUEUE_REDIS_PORT)
yield self.mongo.admin.authenticate("root", "chenfuzhi")
yield self.mongo_conf.admin.authenticate("root", "chenfuzhi")
defer.returnValue(True)
except Exception, e:
import traceback
traceback.print_exc()
defer.returnValue(False)
def init_db(self):
self.redis = None
self.mongo_conf = None
self.mongo = None
try:
self.mongo_conf = yield txmongo.MongoConnection(host=self.setting.CONF_MONGO_HOST, port=self.setting.CONF_MONGO_PORT)
self.mongo = yield txmongo.MongoConnection(host=self.setting.DATA_MONGO_HOST, port=self.setting.DATA_MONGO_PORT)
self.redis = yield txredisapi.Connection(host=self.setting.QUEUE_REDIS_HOST, port=self.setting.QUEUE_REDIS_PORT)
yield self.mongo.admin.authenticate("root", "chenfuzhi")
yield self.mongo_conf.admin.authenticate("root", "chenfuzhi")
defer.returnValue(True)
except Exception, e:
import traceback
traceback.print_exc()
defer.returnValue(False)
def _get_connection(self):
"""Connect to the MongoDB server."""
if self._connection is None:
from pymongo import MongoClient
# The first pymongo.Connection() argument (host) can be
# a list of ['host:port'] elements or a mongodb connection
# URI. If this is the case, don't use self.port
# but let pymongo get the port(s) from the URI instead.
# This enables the use of replica sets and sharding.
# See pymongo.Connection() for more info.
url = self.host
if isinstance(url, string_t) \
and not url.startswith('mongodb://'):
url = 'mongodb://{0}:{1}'.format(url, self.port)
if url == 'mongodb://':
url = url + 'localhost'
if detect_environment() != 'default':
self.options['use_greenlets'] = True
self._connection = MongoClient(host=url, **self.options)
return self._connection
def __init__(self):
asyn_client = asyncmongo.Client(
pool_id='isdb',
host=setting.MONGO_HOST,
port=setting.MONGO_PORT,
dbuser=setting.MONGO_USER,
dbpass=setting.MONGO_PASS,
dbname='admin',
maxcached=150,
maxconnections=150,
)
connection = Connection(setting.MONGO_HOST, setting.MONGO_PORT)
options["asyn_client"] = asyn_client
AsyncBaseModel.configure(asyn_client)
options["mono_conn"] = connection
logging.error("{}".format("[init]Mongodb init success"))
def __new__(cls,size=5,dbname='',*args,**kwargs):
if not hasattr(cls,'_instance'):
cls._instance = object.__new__(cls)
cls._instance.dbname = dbname
cls._instance.queue = PriorityQueue(size)
for x in xrange(size):
try:
cls._instance.queue.put(
(time.time(),pymongo.Connection(*args,**kwargs)[dbname])
)
except Exception,e:
raise MongoPoolCantConnect('Can\'t connect to mongo servers: %s' % e)
return cls._instance
def nosql():
import pymongo
import json
conn = pymongo.Connection('127.0.0.1', port=27017)
df = ts.get_tick_data('600848',date='2014-12-22')
print(df.to_json(orient='records'))
conn.db.tickdata.insert(json.loads(df.to_json(orient='records')))
# print conn.db.tickdata.find()
def insert_log():
connection = Connection('localhost', 27017)
db = connection.mydb
collection = db.logdata
alf = ApacheLogFile(r'D:\work\gitproject\python3-cookbook\configs\app.log')
for lg_line in alf.my_generator():
collection.insert(props(lg_line))
def query_log():
connection = Connection('localhost', 27017)
db = connection.mydb
collection = db.logdata
for doc in collection.find():
print(doc)
connection.close()
def check_connect(host, port, warning, critical, perf_data, user, passwd, conn_time):
warning = warning or 3
critical = critical or 6
message = "Connection took %i seconds" % conn_time
message += performance_data(perf_data,[(conn_time,"connection_time",warning,critical)])
return check_levels(conn_time,warning,critical,message)
def check_connect_primary(con, warning, critical, perf_data):
warning = warning or 3
critical = critical or 6
try:
try:
set_read_preference(con.admin)
data = con.admin.command(pymongo.son_manipulator.SON([('isMaster', 1)]))
except:
data = con.admin.command(son.SON([('isMaster', 1)]))
if data['ismaster'] == True :
print "OK - This server is primary"
return 0
phost = data['primary'].split(':')[0]
pport = int(data['primary'].split(':')[1])
start = time.time()
err,con=mongo_connect(phost, pport)
if err!=0:
return err
pconn_time = time.time() - start
pconn_time = round(pconn_time, 0)
message = "Connection to primary server "+data['primary']+" took %i seconds" % pconn_time
message += performance_data(perf_data,[(pconn_time,"connection_time",warning,critical)])
return check_levels(pconn_time,warning,critical,message)
except Exception, e:
return exit_with_general_critical(e)
def __init__(self, host = db_config['DB_HOST'],
port = db_config['DB_PORT'],
user = db_config['DB_USER'],
passwd = db_config['DB_PSW'],
db = db_config['DB_NAME'],
charset = db_config['DB_CHARSET']):
self.connection = pymongo.Connection(host, port)
self.db = self.connection[db]
self.db.authenticate(user, passwd)
def __init__(self,hostname,db,collection,port=27017):
self.conn = pymongo.Connection(hostname,port)
self.db = self.conn[db]
self.collection = collection
def load_conf(self):
spider_conf = {}
conn = pymongo.Connection(self.setting.CONF_MONGO_HOST, self.setting.CONF_MONGO_PORT)
row = conn.taobao.spider.find_one({"name":self.spider})
spider_conf = dict(row)
open("models/get_seed_%s.py" % str(spider_conf["_id"]), "w").write(spider_conf["get_seed"].encode("utf-8"))
spider_conf["navi_models"] = []
for navi in spider_conf["navi_list"]:
navi_row = conn.taobao.spider_navi.find_one({"_id":navi})
open("models/navi_%s.py" % str(navi_row["_id"]), "w").write(navi_row["python_code"].encode("utf-8"))
spider_conf["navi_models"].append(dict(navi_row))
redis_queue = conn.taobao.redis_queue.find({"_id":spider_conf["_id"]})
spider_conf["redis_queue"] = list(redis_queue)
return spider_conf
def init_database(self):
self.mongo = yield txmongo.MongoConnection(host=self.setting.DATA_MONGO_HOST, port=self.setting.DATA_MONGO_PORT)
self.redis = yield txredisapi.Connection(host=self.setting.QUEUE_REDIS_HOST, port=self.setting.QUEUE_REDIS_PORT)
#yield self.mongo.admin.authenticate("root", "chenfuzhi")
defer.returnValue(True)
def load_conf(self):
conn = pymongo.Connection(host=self.setting.CONF_MONGO_HOST, port=self.setting.CONF_MONGO_PORT)
row = conn.taobao.spider.find_one({"name":self.spider})
self.conf = dict(row)
rows = conn.taobao.redis_queue.find({"_id":self.conf["_id"]}, sort=("prio", 1))
self.queues = list(rows)
def mongoDB_connect(self,ip,username,password,port):
crack=0
try:
connection=pymongo.Connection(ip,port)
db=connection.admin
db.collection_names()
self.lock.acquire()
printRed('%s mongodb service at %s allow login Anonymous login!!\r\n' %(ip,port))
self.result.append('%s mongodb service at %s allow login Anonymous login!!\r\n' %(ip,port))
self.lock.release()
crack=1
except Exception,e:
if e[0]=='database error: not authorized for query on admin.system.namespaces':
try:
r=db.authenticate(username,password)
if r!=False:
crack=2
else:
self.lock.acquire()
crack=3
print "%s mongodb service 's %s:%s login fail " %(ip,username,password)
self.lock.release()
except Exception,e:
pass
else:
printRed('%s mongodb service at %s not connect' %(ip,port))
crack=4
return crack
def mongoDB_connect(self,ip,username,password,port):
crack=0
try:
connection=pymongo.Connection(ip,port)
db=connection.admin
db.collection_names()
self.lock.acquire()
printRed('%s mongodb service at %s allow login Anonymous login!!\r\n' %(ip,port))
self.result.append('%s mongodb service at %s allow login Anonymous login!!\r\n' %(ip,port))
self.lock.release()
crack=1
except Exception,e:
if e[0]=='database error: not authorized for query on admin.system.namespaces':
try:
r=db.authenticate(username,password)
if r!=False:
crack=2
else:
self.lock.acquire()
crack=3
print "%s mongodb service 's %s:%s login fail " %(ip,username,password)
self.lock.release()
except Exception,e:
pass
else:
printRed('%s mongodb service at %s not connect' %(ip,port))
crack=4
return crack
def __init__(self, col, index=None):
connection = pymongo.Connection(settings.MONGODB_SERVER, settings.MONGODB_PORT)
self.db = connection[settings.MONGODB_DB]
self.collection = self.db[col]
if index:
self.collection.create_index(index, unique=True)
def __init__(self):
import pymongo
connection = pymongo.Connection(settings['MONGODB_SERVER'], settings['MONGODB_PORT'])
self.db = connection[settings['MONGODB_DB']]
self.collection = self.db[settings['MONGODB_COLLECTION']]
if self.__get_uniq_key() is not None:
self.collection.create_index(self.__get_uniq_key(), unique=True)
def load_mongo(self):
self.mongo_conn = pymongo.Connection('localhost', 27017)
self.mongo_db = self.mongo_conn[mongo_db_name(self.base_fname)]
def __init__(self, db_name,
collection_name='mongo_dict_data', connection=None):
"""
:param db_name: database name (be careful with production databases)
:param collection_name: collection name (default: mongo_dict_data)
:param connection: ``pymongo.Connection`` instance. If it's ``None``
(default) new connection with default options will
be created
"""
if connection is not None:
self.connection = connection
else:
self.connection = Connection()
self.db = self.connection[db_name]
self.collection = self.db[collection_name]
def get_mongodb_connection(cls):
if cls._connection is not None:
return cls._connection
try:
cls._connection = pymongo.Connection(
"{0}:{1}".format(
REQUEST_LOGGING_BACKEND['host'],
REQUEST_LOGGING_BACKEND['port']
)
)
return cls._connection
except Exception as e:
# TODO: Add log here.
return None
bebat_mongodb_user.py 文件源码
项目:Ansible-MongoDB-Install-Role
作者: bbatsche
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def get_client(self):
if self.replica_set:
return MongoClient(self.login_host, self.login_port, replicaset=self.replica_set, ssl=self.ssl)
else:
return MongoClient(self.login_host, self.login_port, ssl=self.ssl)
def __init__(self, db_name,
collection_name='mongo_dict_data', connection=None):
"""
:param db_name: database name (be careful with production databases)
:param collection_name: collection name (default: mongo_dict_data)
:param connection: ``pymongo.Connection`` instance. If it's ``None``
(default) new connection with default options will
be created
"""
if connection is not None:
self.connection = connection
else:
self.connection = Connection()
self.db = self.connection[db_name]
self.collection = self.db[collection_name]
def get_data_from_mongo():
conn = pymongo.Connection('192.168.17.128',27017)
db = conn.db_lianjia
results = db.scrapy_zlzp_info.find({},{"zwlb":1,"zwyx":1,"gsdz":1,"gsxz":1,"_id":0})
#results = db.scrapy_zlzp_info.find({})
zwlb = ['C??','C++','C#','PYTHON','RUBY','JAVA','IOS','ANDROID','HTML','PHP']
#zwlb = ['SCALA']
zwnum_set = {}
zwyx_set = {}
je_re = re.compile('([0-9 ]*)-([0-9 ]*)')
def get_average_salary(slary):
r_slary = slary.replace(',','')
m = je_re.match(r_slary)
try:
if m:
low = m.group(1)
high = m.group(2)
return (float(low) + float(high)) / 2
except:
return 0
return 0
#print(get_average_salary('??'))
for result in results:
zw = result.get('zwlb')
yx = result.get('zwyx')
if isinstance(yx,str) and isinstance(zw,str):
uzw = zw.upper()
for zwfl in zwlb:
if uzw.rfind(zwfl) != -1:
zwnum_set[zwfl] = zwnum_set.get(zwfl,0) + 1
zwyx_set[zwfl] = zwyx_set.get(zwfl,0) + get_average_salary(yx)
#print(zwfl,yx,get_average_salary(yx))
for key in zwnum_set.keys():
zwyx_set[key] = zwyx_set[key]/float(zwnum_set[key])
return zwyx_set,zwnum_set