def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
python类INDENT的实例源码
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def _find_strings(filename, encoding=None):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
with open(filename, encoding=encoding) as f:
tok = tokenize.generate_tokens(f.readline)
for ttype, tstr, start, end, line in tok:
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
return d
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def _find_strings(filename, encoding=None):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
with open(filename, encoding=encoding) as f:
tok = tokenize.generate_tokens(f.readline)
for ttype, tstr, start, end, line in tok:
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
return d
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def find_strings(filename):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
f = open(filename)
for ttype, tstr, start, end, line in tokenize.generate_tokens(f.readline):
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
f.close()
return d
def _find_strings(filename, encoding=None):
"""Return a dict of possible docstring positions.
The dict maps line numbers to strings. There is an entry for
line that contains only a string or a part of a triple-quoted
string.
"""
d = {}
# If the first token is a string, then it's the module docstring.
# Add this special case so that the test in the loop passes.
prev_ttype = token.INDENT
with open(filename, encoding=encoding) as f:
tok = tokenize.generate_tokens(f.readline)
for ttype, tstr, start, end, line in tok:
if ttype == token.STRING:
if prev_ttype == token.INDENT:
sline, scol = start
eline, ecol = end
for i in range(sline, eline + 1):
d[i] = 1
prev_ttype = ttype
return d
def _find_logical(source_lines):
# Make a variable which is the index of all the starts of lines.
logical_start = []
logical_end = []
last_newline = True
parens = 0
for t in generate_tokens(''.join(source_lines)):
if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
tokenize.INDENT, tokenize.NL,
tokenize.ENDMARKER]:
continue
if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
last_newline = True
logical_end.append((t[3][0] - 1, t[2][1]))
continue
if last_newline and not parens:
logical_start.append((t[2][0] - 1, t[2][1]))
last_newline = False
if t[0] == tokenize.OP:
if t[1] in '([{':
parens += 1
elif t[1] in '}])':
parens -= 1
return (logical_start, logical_end)
def __openseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == ')':
# We've seen the last of the translatable strings. Record the
# line number of the first line of the strings and update the list
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
}
self.__state = self.__waiting
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def _get_indentword(source):
"""Return indentation type."""
indent_word = ' ' # Default in case source has no indentation
try:
for t in generate_tokens(source):
if t[0] == token.INDENT:
indent_word = t[1]
break
except (SyntaxError, tokenize.TokenError):
pass
return indent_word
def _parse_tokens(tokens):
"""Parse the tokens.
This converts the tokens into a form where we can manipulate them
more easily.
"""
index = 0
parsed_tokens = []
num_tokens = len(tokens)
while index < num_tokens:
tok = Token(*tokens[index])
assert tok.token_type != token.INDENT
if tok.token_type == tokenize.NEWLINE:
# There's only one newline and it's at the end.
break
if tok.token_string in '([{':
(container, index) = _parse_container(tokens, index)
if not container:
return None
parsed_tokens.append(container)
else:
parsed_tokens.append(Atom(tok))
index += 1
return parsed_tokens
def multiline_string_lines(source, include_docstrings=False):
"""Return line numbers that are within multiline strings.
The line numbers are indexed at 1.
Docstrings are ignored.
"""
line_numbers = set()
previous_token_type = ''
try:
for t in generate_tokens(source):
token_type = t[0]
start_row = t[2][0]
end_row = t[3][0]
if token_type == tokenize.STRING and start_row != end_row:
if (
include_docstrings or
previous_token_type != tokenize.INDENT
):
# We increment by one since we want the contents of the
# string.
line_numbers |= set(range(1 + start_row, 1 + end_row))
previous_token_type = token_type
except (SyntaxError, tokenize.TokenError):
pass
return line_numbers
def _get_indentword(source):
"""Return indentation type."""
indent_word = ' ' # Default in case source has no indentation
try:
for t in generate_tokens(source):
if t[0] == token.INDENT:
indent_word = t[1]
break
except (SyntaxError, tokenize.TokenError):
pass
return indent_word
def _parse_tokens(tokens):
"""Parse the tokens.
This converts the tokens into a form where we can manipulate them
more easily.
"""
index = 0
parsed_tokens = []
num_tokens = len(tokens)
while index < num_tokens:
tok = Token(*tokens[index])
assert tok.token_type != token.INDENT
if tok.token_type == tokenize.NEWLINE:
# There's only one newline and it's at the end.
break
if tok.token_string in '([{':
(container, index) = _parse_container(tokens, index)
if not container:
return None
parsed_tokens.append(container)
else:
parsed_tokens.append(Atom(tok))
index += 1
return parsed_tokens
def multiline_string_lines(source, include_docstrings=False):
"""Return line numbers that are within multiline strings.
The line numbers are indexed at 1.
Docstrings are ignored.
"""
line_numbers = set()
previous_token_type = ''
try:
for t in generate_tokens(source):
token_type = t[0]
start_row = t[2][0]
end_row = t[3][0]
if token_type == tokenize.STRING and start_row != end_row:
if (
include_docstrings or
previous_token_type != tokenize.INDENT
):
# We increment by one since we want the contents of the
# string.
line_numbers |= set(range(1 + start_row, 1 + end_row))
previous_token_type = token_type
except (SyntaxError, tokenize.TokenError):
pass
return line_numbers
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
repo_health_checkup.py 文件源码
项目:interview-with-python
作者: thundergolfer
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def check_amount_of_commenting( readable ):
"""
Check lines of file to see whether it has a lot of commenting in it,
which we take to mean the presence of code explanation.
"""
# TODO: I have hacked this code together and it could be silly and buggy.
# It does seem to get things roughly correct, which is good enough for the moment
prev_toktype = token.INDENT
first_line = None
last_lineno = -1
last_col = 0
comment_lines = 0
tokgen = tokenize.generate_tokens( readable )
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if 1: # Change to if 1 to see the tokens fly by.
print("%10s %-14s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
"%d.%d-%d.%d" % (slineno, scol, elineno, ecol),
ttext, ltext
))
if slineno > last_lineno:
last_col = 0
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
comment_lines += 1
elif toktype == tokenize.COMMENT:
# Comment
comment_lines += 1
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
return comment_lines, last_lineno # roughly num of comment lines divided by num of lines
def __call__(self, toktype, toktext, start_pos, end_pos, line):
""" Token handler, with syntax highlighting."""
(srow,scol) = start_pos
(erow,ecol) = end_pos
colors = self.colors
owrite = self.out.write
# line separator, so this works across platforms
linesep = os.linesep
# calculate new positions
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
# send the original whitespace, if needed
if newpos > oldpos:
owrite(self.raw[oldpos:newpos])
# skip indenting tokens
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
# map token type to a color group
if token.LPAR <= toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
color = colors.get(toktype, colors[_TEXT])
#print '<%s>' % toktext, # dbg
# Triple quoted strings must be handled carefully so that backtracking
# in pagers works correctly. We need color terminators on _each_ line.
if linesep in toktext:
toktext = toktext.replace(linesep, '%s%s%s' %
(colors.normal,linesep,color))
# send text
owrite('%s%s%s' % (color,toktext,colors.normal))