def map_discipl(self,invalue,disctab):
"""
Convert disciplines along B2FIND disciplinary list
Copyright (C) 2014 Heinrich Widmann
Licensed under AGPLv3.
"""
retval=list()
if type(invalue) is not list :
inlist=re.split(r'[;&\s]\s*',invalue)
inlist.append(invalue)
else:
seplist=[re.split(r"[;&]",i) for i in invalue]
swlist=[re.findall(r"[\w']+",i) for i in invalue]
inlist=swlist+seplist
inlist=[item for sublist in inlist for item in sublist]
for indisc in inlist :
##indisc=indisc.encode('ascii','ignore').capitalize()
indisc=indisc.encode('utf8').replace('\n',' ').replace('\r',' ').strip().title()
maxr=0.0
maxdisc=''
for line in disctab :
try:
disc=line[2].strip()
r=lvs.ratio(indisc,disc)
except Exception as e:
logging.error('[ERROR] %s in map_discipl : %s can not compared to %s !' % (e,indisc,disc))
continue
if r > maxr :
maxdisc=disc
maxr=r
##HEW-T print('--- %s \n|%s|%s| %f | %f' % (line,indisc,disc,r,maxr)
if maxr == 1 and indisc == maxdisc :
logging.debug(' | Perfect match of %s : nothing to do' % indisc)
retval.append(indisc.strip())
elif maxr > 0.90 :
logging.debug(' | Similarity ratio %f is > 0.90 : replace value >>%s<< with best match --> %s' % (maxr,indisc,maxdisc))
##return maxdisc
retval.append(indisc.strip())
else:
logging.debug(' | Similarity ratio %f is < 0.90 compare value >>%s<< and discipline >>%s<<' % (maxr,indisc,maxdisc))
continue
if len(retval) > 0:
retval=list(OrderedDict.fromkeys(retval)) ## this elemenates real duplicates
return ';'.join(retval)
else:
return 'Not stated'
评论列表
文章目录