def scan(self, tokens, max_matches=six.MAXSIZE, overlap=False):
""""""
if not self.streamlined:
self.streamline()
matches = 0
i = 0
length = len(tokens)
while i < length and matches < max_matches:
try:
results, next_i = self.parse(tokens, i)
except ParseException as err:
i += 1
else:
if next_i > i:
matches += 1
if len(results) == 1:
results = results[0]
yield results, i, next_i
if overlap:
i += 1
else:
i = next_i
else:
i += 1
python类MAXSIZE的实例源码
short_sentence_similarity.py 文件源码
项目:Semantic-Texual-Similarity-Toolkits
作者: rgtjf
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def length_dist(synset_1, synset_2):
"""
Return a measure of the length of the shortest path in the semantic
ontology (Wordnet in our case as well as the paper's) between two
synsets.
"""
l_dist = six.MAXSIZE
if synset_1 is None or synset_2 is None:
return 0.0
if synset_1 == synset_2:
# if synset_1 and synset_2 are the same synset return 0
l_dist = 0.0
else:
wset_1 = set([str(x.name()) for x in synset_1.lemmas()])
wset_2 = set([str(x.name()) for x in synset_2.lemmas()])
if len(wset_1.intersection(wset_2)) > 0:
# if synset_1 != synset_2 but there is word overlap, return 1.0
l_dist = 1.0
else:
# just compute the shortest path between the two
l_dist = synset_1.shortest_path_distance(synset_2)
if l_dist is None:
l_dist = 0.0
# normalize path length to the range [0,1]
return math.exp(-ALPHA * l_dist)
def format_docstring(docstring):
if not docstring:
return ''
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = docstring.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = six.MAXSIZE
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < six.MAXSIZE:
for line in lines[1:]:
trimmed.append(line[indent:].rstrip())
# Strip off trailing and leading blank lines:
while trimmed and not trimmed[-1]:
trimmed.pop()
while trimmed and not trimmed[0]:
trimmed.pop(0)
# Return a single string:
return '\n'.join(trimmed)
def test_normal_1(self, container):
for value in [None, -six.MAXSIZE, 0, None, six.MAXSIZE, None]:
container.update(value)
assert container.has_value()
assert not container.is_zero()
assert container.min_value == -six.MAXSIZE
assert container.max_value == six.MAXSIZE
def __init__(self, alignment):
self.alignment = alignment
self.node_list = [MemoryNode(six.MAXSIZE)]
self.max_allocation = 0
def allocate_best_fit(self, size):
size = MemoryManager.align(size, self.alignment)
best_node = None
best_offset = None
best_delta = six.MAXSIZE
offset = 0
for i, node in enumerate(self.node_list):
delta = node.size - size
if node.is_free and delta >= 0:
if not best_node or delta < best_delta:
best_i = i
best_node = node
best_offset = offset
best_delta = delta
offset += node.size
if not best_node:
raise RuntimeError("Bad Allocation")
else:
if best_delta == 0:
best_node.is_free = False
else:
self.node_list[best_i].size -= size
self.node_list.insert(best_i, MemoryNode(size, is_free=False))
self.max_allocation = max(self.max_allocation, best_offset + size)
return best_offset
def list_more(fn, offset, size, batch_size, *args):
"""list all data using the fn
"""
if size < 0:
expected_total_size = six.MAXSIZE
else:
expected_total_size = size
batch_size = min(size, batch_size)
response = None
total_count_got = 0
while True:
ret = fn(*args, offset=offset, size=batch_size)
if response is None:
response = ret
else:
response.merge(ret)
count = ret.get_count()
total = ret.get_total()
offset += count
total_count_got += count
batch_size = min(batch_size, expected_total_size - total_count_got)
if count == 0 or offset >= total or total_count_got >= expected_total_size:
break
return response
def query_more(fn, offset, size, batch_size, *args):
"""list all data using the fn
"""
if size < 0:
expected_total_size = six.MAXSIZE
else:
expected_total_size = size
batch_size = min(size, batch_size)
response = None
total_count_got = 0
complete = False
while True:
for _c in range(DEFAULT_QUERY_RETRY_COUNT):
ret = fn(*args, offset=offset, size=batch_size)
if ret.is_completed():
complete = True
break
time.sleep(DEFAULT_QUERY_RETRY_INTERVAL)
if response is None:
response = ret
else:
response.merge(ret)
# if incompete, exit
if not complete:
break
count = ret.get_count()
offset += count
total_count_got += count
batch_size = min(batch_size, expected_total_size - total_count_got)
if count == 0 or total_count_got >= expected_total_size:
break
return response
def test_0_call_int2(self):
c = rpc.Client(self._client_sock)
obj = six.MAXSIZE
assert isinstance(obj, int)
result = c.call(b'resp', [obj])
assert result == obj
import sys
# note: on PyPy, result will be a long type value.
sv = getattr(sys, 'subversion', None)
if sv is not None and sv[0] == 'PyPy':
assert isinstance(result, long)
else:
assert isinstance(result, type(obj))
def test_0_call_int3(self):
c = rpc.Client(self._client_sock)
obj = - six.MAXSIZE - 1
assert isinstance(obj, int)
result = c.call(b'resp', [obj])
assert result == obj
assert isinstance(result, type(obj))
def test_integer_types():
assert isinstance(1, six.integer_types)
assert isinstance(-1, six.integer_types)
assert isinstance(six.MAXSIZE + 23, six.integer_types)
assert not isinstance(.1, six.integer_types)
def test_MAXSIZE():
try:
# This shouldn't raise an overflow error.
six.MAXSIZE.__index__()
except AttributeError:
# Before Python 2.6.
pass
py.test.raises(
(ValueError, OverflowError),
operator.mul, [None], six.MAXSIZE + 1)
short_sentence_similarity.py 文件源码
项目:Semantic-Texual-Similarity-Toolkits
作者: rgtjf
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def hierarchy_dist(synset_1, synset_2):
"""
Return a measure of depth in the ontology to model the fact that
nodes closer to the root are broader and have less semantic similarity
than nodes further away from the root.
"""
h_dist = six.MAXSIZE
if synset_1 is None or synset_2 is None:
return h_dist
if synset_1 == synset_2:
# return the depth of one of synset_1 or synset_2
h_dist = max([x[1] for x in synset_1.hypernym_distances()])
else:
# find the max depth of least common subsumer
hypernyms_1 = {x[0]:x[1] for x in synset_1.hypernym_distances()}
hypernyms_2 = {x[0]:x[1] for x in synset_2.hypernym_distances()}
lcs_candidates = set(hypernyms_1.keys()).intersection(
set(hypernyms_2.keys()))
if len(lcs_candidates) > 0:
lcs_dists = []
for lcs_candidate in lcs_candidates:
lcs_d1 = 0
if lcs_candidate in hypernyms_1 :
lcs_d1 = hypernyms_1[lcs_candidate]
lcs_d2 = 0
if lcs_candidate in hypernyms_2:
lcs_d2 = hypernyms_2[lcs_candidate]
lcs_dists.append(max([lcs_d1, lcs_d2]))
h_dist = max(lcs_dists)
else:
h_dist = 0
return ((math.exp(BETA * h_dist) - math.exp(-BETA * h_dist)) /
(math.exp(BETA * h_dist) + math.exp(-BETA * h_dist)))
def test_integer_types():
assert isinstance(1, six.integer_types)
assert isinstance(-1, six.integer_types)
assert isinstance(six.MAXSIZE + 23, six.integer_types)
assert not isinstance(.1, six.integer_types)
def test_MAXSIZE():
try:
# This shouldn't raise an overflow error.
six.MAXSIZE.__index__()
except AttributeError:
# Before Python 2.6.
pass
py.test.raises(
(ValueError, OverflowError),
operator.mul, [None], six.MAXSIZE + 1)
def test_integer_types():
assert isinstance(1, six.integer_types)
assert isinstance(-1, six.integer_types)
assert isinstance(six.MAXSIZE + 23, six.integer_types)
assert not isinstance(.1, six.integer_types)
def test_MAXSIZE():
try:
# This shouldn't raise an overflow error.
six.MAXSIZE.__index__()
except AttributeError:
# Before Python 2.6.
pass
py.test.raises(
(ValueError, OverflowError),
operator.mul, [None], six.MAXSIZE + 1)
def test_0_call_int2(self):
c = rpc.Client(self._client_sock)
obj = six.MAXSIZE
assert isinstance(obj, int)
result = c.call('resp', [obj])
assert result == obj
assert isinstance(result, numbers.Integral)
def test_0_call_int3(self):
c = rpc.Client(self._client_sock)
obj = - six.MAXSIZE - 1
assert isinstance(obj, int)
result = c.call('resp', [obj])
assert result == obj
assert isinstance(result, numbers.Integral)