def normalize_hamza(word):
"""Standardize the Hamzat into one form of hamza,
replace Madda by hamza and alef.
Replace the LamAlefs by simplified letters.
Example:
>>> text = u"??? ??? ??????"
>>> normalizeHamza(text)
??? ??? ??????
@param word: arabic text.
@type word: unicode.
@return: return a converted text.
@rtype: unicode.
"""
if word.startswith(ALEF_MADDA):
if len(word)>= 3 and (word[1] not in HARAKAT) and \
(word[2] == SHADDA or len(word) == 3):
word = HAMZA + ALEF + word[1:]
else:
word = HAMZA + HAMZA + word[1:]
# convert all Hamza from into one form
word = word.replace(ALEF_MADDA, HAMZA+HAMZA)
word = HAMZAT_PATTERN.sub(HAMZA, word)
return word
评论列表
文章目录