def normalize_ligature(text):
"""Normalize Lam Alef ligatures into two letters (LAM and ALEF),
and Tand return a result text.
Some systems present lamAlef ligature as a single letter,
this function convert it into two letters,
The converted letters into LAM and ALEF are :
- LAM_ALEF, LAM_ALEF_HAMZA_ABOVE, LAM_ALEF_HAMZA_BELOW, LAM_ALEF_MADDA_ABOVE
Example:
>>> text = u"????? ???? ???????"
>>> normalizeLigature(text)
????? ???? ???????
@param text: arabic text.
@type text: unicode.
@return: return a converted text.
@rtype: unicode.
"""
if text:
return LIGUATURES_PATTERN.sub(u'%s%s'%(LAM, ALEF), text)
return text
评论列表
文章目录