def clear_text(text=''):
t_text = text.replace(r'\xa0', ' ')
t_text = t_text.replace('????', '')
t_text = re.subn('\(.* Bytes, ????: .*\)', '', t_text)[0]
t_text = re.subn('\d*-\d*-\d* \d*:\d* ??', '', t_text)[0]
t_text = re.subn('.*\.png\s', '', t_text)[0]
return t_text.strip()
python类subn()的实例源码
def safe_name(self):
if self.name is not None:
return re.subn(r"[^\w]", "_", self.name)[0]
def subn(self, repl, string, count):
return re_sub_ex(self.pattern, self.compiled, repl, string, count, self.flags)
# -- Parser ------------------------------------------------------------------
def clean(s, unicode=True):
flags = re.UNICODE if unicode else 0
return re.subn(r'(\s){2,}', '\g<1>', s, flags)[0].strip()
def refine_item(self, response, item):
birth_death = text.clean_extract(response,
'.//div[@id="maincontent"]/p[1]/em'
).split('<br>')[0]
birth_death = re.subn(r'<[^>]+>', '', birth_death)[0].split('d.')
if len(birth_death) == 2:
birth, death = birth_death
birth = birth[len('b.'):].strip()
death = death.strip()
item['birth'] = birth if birth != '?' else None
item['death'] = death if death != '?' else None
return super(MunksrollSpider, self).refine_item(response, item)
replacers.py 文件源码
项目:Natural-Language-Processing-Python-and-NLTK
作者: PacktPublishing
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def replace(self, text):
s = text
for (pattern, repl) in self.patterns:
(s, count) = re.subn(pattern, repl, s)
return s
def html2bash(str):#??HTML p??
result , number = re.subn(constant.PATTERN_BR,'\n',str)
result , number = re.subn(constant.PATTERN_P,'\n',result)
result , number = re.subn(constant.PATTERN_F_P, '\n', result)
return result
def html2bash(str):#??HTML p??
result , number = re.subn(constant.PATTERN_BR,'\n',str)
result , number = re.subn(constant.PATTERN_P,'\n',result)
result , number = re.subn(constant.PATTERN_F_P, '\n', result)
return result
def money_str(self, with_symbol=True):
""" Format money using the currency's format string.
If amount is negative, the sign is moved to the front of the string
>>> Money(-12).money_str()
-$12.00
>>> Money(123456.789).money_str()
$123,456.79
>>> Money(987654321, currency=CURRENCIES['IDR']).money_str()
Rp 987654321
>>> Money(-987654321, currency=CURRENCIES['IDR']).money_str()
Rp -987654321
"""
# Strip the sign and move it to the front of the string
#fails if any component cannot be converted to ascii -kurtis
string = self.currency.format_str.format(
amount=abs(self.amount),
symbol=self.currency.symbol,
code=self.currency.code,
name=self.currency.name,
decimals=self.currency.decimals)
if self.amount_raw < 0:
string, n = re.subn(r'\s', r' -', string, count=1)
if n == 0:
string = '-' + string
return string
def money_str(self, with_symbol=True):
""" Format money using the currency's format string.
If amount is negative, the sign is moved to the front of the string
>>> Money(-12).money_str()
-$12.00
>>> Money(123456.789).money_str()
$123,456.79
>>> Money(987654321, currency=CURRENCIES['IDR']).money_str()
Rp 987654321
>>> Money(-987654321, currency=CURRENCIES['IDR']).money_str()
Rp -987654321
"""
# Strip the sign and move it to the front of the string
#fails if any component cannot be converted to ascii -kurtis
string = self.currency.format_str.format(
amount=abs(self.amount),
symbol=self.currency.symbol,
code=self.currency.code,
name=self.currency.name,
decimals=self.currency.decimals)
if self.amount_raw < 0:
string, n = re.subn(r'\s', r' -', string, count=1)
if n == 0:
string = '-' + string
return string
def money_str(self, with_symbol=True):
""" Format money using the currency's format string.
If amount is negative, the sign is moved to the front of the string
>>> Money(-12).money_str()
-$12.00
>>> Money(123456.789).money_str()
$123,456.79
>>> Money(987654321, currency=CURRENCIES['IDR']).money_str()
Rp 987654321
>>> Money(-987654321, currency=CURRENCIES['IDR']).money_str()
Rp -987654321
"""
# Strip the sign and move it to the front of the string
#fails if any component cannot be converted to ascii -kurtis
string = self.currency.format_str.format(
amount=abs(self.amount),
symbol=self.currency.symbol,
code=self.currency.code,
name=self.currency.name,
decimals=self.currency.decimals)
if self.amount_raw < 0:
string, n = re.subn(r'\s', r' -', string, count=1)
if n == 0:
string = '-' + string
return string
def test_as_replace_function(self):
"""Test that replace can be used as a replace function."""
text = "this will be fed into re.subn! Here we go! this will be fed into re.subn! Here we go!"
text_pattern = r"(?P<first>this )(?P<second>.*?)(!)"
pattern = bre.compile_search(text_pattern)
replace = bre.compile_replace(pattern, r'\c\g<first>is awesome\g<3>')
result, count = pattern.subn(replace, text)
self.assertEqual(result, "This is awesome! Here we go! This is awesome! Here we go!")
self.assertEqual(count, 2)
def test_sub_wrong_replace_type(self):
"""Test sending wrong type into `sub`, `subn`."""
pattern = re.compile('test')
replace = bre.compile_replace(pattern, 'whatever', bre.FORMAT)
with pytest.raises(ValueError) as excinfo:
bre.sub(pattern, replace, 'test')
assert "Compiled replace cannot be a format object!" in str(excinfo.value)
with pytest.raises(ValueError) as excinfo:
bre.subn(pattern, replace, 'test')
assert "Compiled replace cannot be a format object!" in str(excinfo.value)
def test_sub_wrong_replace_format_type(self):
"""Test sending wrong format type into `sub`, `subn`."""
pattern = re.compile('test')
replace = bre.compile_replace(pattern, 'whatever')
with pytest.raises(ValueError) as excinfo:
bre.subf(pattern, replace, 'test')
assert "Compiled replace is not a format object!" in str(excinfo.value)
with pytest.raises(ValueError) as excinfo:
bre.subfn(pattern, replace, 'test')
assert "Compiled replace is not a format object!" in str(excinfo.value)
def test_subn(self):
"""Test that `subn` works."""
self.assertEqual(
bre.subn(r'tset', 'test', r'This is a tset for subn! This is a tset for subn!'),
('This is a test for subn! This is a test for subn!', 2)
)
def compile_replace(pattern, repl, flags=0):
"""Construct a method that can be used as a replace method for `sub`, `subn`, etc."""
call = None
if pattern is not None and isinstance(pattern, RE_TYPE):
if isinstance(repl, (compat.string_type, compat.binary_type)):
repl = ReplaceTemplate(pattern, repl, bool(flags & FORMAT))
call = Replace(
functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
)
elif isinstance(repl, Replace):
if flags:
raise ValueError("Cannot process flags argument with a compiled pattern!")
if repl.pattern_hash != hash(pattern):
raise ValueError("Pattern hash doesn't match hash in compiled replace!")
call = repl
elif isinstance(repl, ReplaceTemplate):
if flags:
raise ValueError("Cannot process flags argument with a ReplaceTemplate!")
call = Replace(
functools.partial(_apply_replace_backrefs, repl=repl), repl.use_format, repl.pattern_hash
)
else:
raise TypeError("Not a valid type!")
else:
raise TypeError("Pattern must be a compiled regular expression!")
return call
# Convenience methods like re has, but slower due to overhead on each call.
# It is recommended to use compile_search and compile_replace
def subfn(pattern, format, string, count=0, flags=0): # noqa B002
"""Apply `subn` after applying backrefs."""
is_replace = _is_replace(format)
is_string = isinstance(format, (compat.string_type, compat.binary_type))
if is_replace and not format.use_format:
raise ValueError("Compiled replace is not a format object!")
pattern = compile_search(pattern, flags)
rflags = FORMAT if is_string else 0
return re.subn(
pattern, (compile_replace(pattern, format, flags=rflags) if is_replace or is_string else format),
string, count, flags
)
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
def test_get_memory_info_without_swap(self):
sample = re.subn(r"Swap(Free|Total): *\d+ kB", r"Swap\1: 0",
SAMPLE_MEMORY_INFO)[0]
filename = self.makeFile(sample)
memstats = MemoryStats(filename)
self.assertEqual(memstats.total_swap, 0)
self.assertEqual(memstats.free_swap, 0)
self.assertEqual(memstats.used_swap, 0)
self.assertEqual(memstats.used_swap_percentage, 0)
self.assertEqual(memstats.free_swap_percentage, 0)
self.assertEqual(type(memstats.used_swap_percentage), float)
self.assertEqual(type(memstats.free_swap_percentage), float)