def get_data_from_mongo():
conn = pymongo.Connection('192.168.17.128',27017)
db = conn.db_lianjia
results = db.scrapy_zlzp_info.find({},{"zwlb":1,"zwyx":1,"gsdz":1,"gsxz":1,"_id":0})
#results = db.scrapy_zlzp_info.find({})
zwlb = ['C??','C++','C#','PYTHON','RUBY','JAVA','IOS','ANDROID','HTML','PHP']
#zwlb = ['SCALA']
zwnum_set = {}
zwyx_set = {}
je_re = re.compile('([0-9 ]*)-([0-9 ]*)')
def get_average_salary(slary):
r_slary = slary.replace(',','')
m = je_re.match(r_slary)
try:
if m:
low = m.group(1)
high = m.group(2)
return (float(low) + float(high)) / 2
except:
return 0
return 0
#print(get_average_salary('??'))
for result in results:
zw = result.get('zwlb')
yx = result.get('zwyx')
if isinstance(yx,str) and isinstance(zw,str):
uzw = zw.upper()
for zwfl in zwlb:
if uzw.rfind(zwfl) != -1:
zwnum_set[zwfl] = zwnum_set.get(zwfl,0) + 1
zwyx_set[zwfl] = zwyx_set.get(zwfl,0) + get_average_salary(yx)
#print(zwfl,yx,get_average_salary(yx))
for key in zwnum_set.keys():
zwyx_set[key] = zwyx_set[key]/float(zwnum_set[key])
return zwyx_set,zwnum_set
评论列表
文章目录