def breakWithOutWhiteSpace(sentence):
import re
r = "\.\w+"
sentences = []
tmp = re.findall(r, sentence, re.X)
places = [0]
if len(tmp)>0:
import enchant
d = enchant.Dict("en_UK")
for item in tmp:
word = item[1:]
if len(word)<2:
if word.lower() in ['i','a']:
places.extend([m.start() for m in re.finditer(item, sentence)])
else:
if d.check(item[1:]):
places.extend([m.start() for m in re.finditer(item, sentence)])
places = sorted(set(places))
places.append(len(sentence)-1)
i = 0
if len(places)==2:
return [sentence]
start = 0
while True:
start = places[i]
if start>0:
start +=1
end = places[i+1] + 1
if end>len(sentence):
end = len(sentence)-1
sentences.append(sentence[start:end])
i +=1
if len(sentences)==len(places)-1:
break
return sentences
评论列表
文章目录