def strip_python_comments(data):
"""
Strip block comments, line comments, empty lines, verbose statements,
and debug statements from a Python source file.
"""
# TODO: implement pyminifier functionality
lines = data.split("\n")
strippedLines = [line for line in lines if ((not line.strip().startswith("#")) and (line.strip() != ''))]
return "\n".join(strippedLines)
###############################################################
#
# Miscellaneous methods (formatting, sorting, etc.)
#
###############################################################
python类startswith()的实例源码
def get_opcodes(alignment):
s_start = 0
s_end = 0
t_start = 0
t_end = 0
opcodes = []
for op in alignment:
if op[0] == "D": # Deletion
s_end += 1
elif op[0] == "I": # Insertion
t_end += 1
elif op[0].startswith("T"): # Transposition
# Extract number of elements involved (default is 2)
k = int(op[1:] or 2)
s_end += k
t_end += k
else: # Match or substitution
s_end += 1
t_end += 1
# Save
opcodes.append((op, s_start, s_end, t_start, t_end))
# Start from here
s_start = s_end
t_start = t_end
return opcodes
def get_translation_dicts(self):
"""
Returns dictionaries for the translation of keysyms to strings and from
strings to keysyms.
"""
keysym_to_string_dict = {}
string_to_keysym_dict = {}
#XK loads latin1 and miscellany on its own; load latin2-4 and greek
Xlib.XK.load_keysym_group('latin2')
Xlib.XK.load_keysym_group('latin3')
Xlib.XK.load_keysym_group('latin4')
Xlib.XK.load_keysym_group('greek')
#Make a standard dict and the inverted dict
for string, keysym in Xlib.XK.__dict__.items():
if string.startswith('XK_'):
string_to_keysym_dict[string[3:]] = keysym
keysym_to_string_dict[keysym] = string[3:]
return keysym_to_string_dict, string_to_keysym_dict
def get_translation_dicts(self):
"""
Returns dictionaries for the translation of keysyms to strings and from
strings to keysyms.
"""
keysym_to_string_dict = {}
string_to_keysym_dict = {}
#XK loads latin1 and miscellany on its own; load latin2-4 and greek
Xlib.XK.load_keysym_group('latin2')
Xlib.XK.load_keysym_group('latin3')
Xlib.XK.load_keysym_group('latin4')
Xlib.XK.load_keysym_group('greek')
#Make a standard dict and the inverted dict
for string, keysym in Xlib.XK.__dict__.items():
if string.startswith('XK_'):
string_to_keysym_dict[string[3:]] = keysym
keysym_to_string_dict[keysym] = string[3:]
return keysym_to_string_dict, string_to_keysym_dict
def _first_char(self, data):
"""Return the first character of data (in bibtex's sense)."""
# XXX Should this be pulled out as some generic algorithm?
pos = 0
depths = self.__depth(data)
while True:
if pos == len(data):
return ''
elif data[pos].isalpha():
return data[pos]
elif data.startswith('{\\', pos):
# Special character
pos += 1
m = re.compile(r'\\[a-zA-Z]+').match(data, pos)
if m and m.group() in _CONTROL_SEQS:
# Known bibtex control sequence
return _CONTROL_SEQS[m.group()]
# Scan for the first alphabetic character
while pos < len(data) and depths[pos]:
if data[pos].isalpha():
return data[pos]
pos += 1
elif data[pos] == '{':
# Skip brace group
while pos < len(data) and depths[pos]:
pos += 1
else:
pos += 1
def parse_month(string, pos=messages.Pos.unknown):
"""Parse a BibTeX month field.
This performs fairly fuzzy parsing that supports all standard
month macro styles (and then some).
Raises InputError if the field cannot be parsed.
"""
val = string.strip().rstrip('.').lower()
for i, name in enumerate(_MONTHS):
if name.startswith(val) and len(val) >= 3:
return i + 1
pos.raise_error('invalid month `{}\''.format(string))
def process(self, string, pos):
"""Expand active characters and macros in string.
Raises InputError if it encounters an active character or
macro it doesn't recognize.
"""
self.__data = string
self.__off = 0
self.__pos = pos
# Process macros
while True:
m = tex_cs_re.search(self.__data, self.__off)
if not m:
break
self.__off = m.end()
macro = m.group(1)
nval = self._expand(macro)
if nval is None:
if macro.startswith('\\'):
pos.raise_error('unknown macro `{}\''.format(macro))
pos.raise_error(
'unknown special character `{}\''.format(macro))
self.__data = self.__data[:m.start()] + nval + \
self.__data[self.__off:]
self.__off = m.start() + len(nval)
return self.__data
def _expand(self, cs):
if cs in self._SIMPLE:
return self._SIMPLE[cs]
if cs in self._ACCENTS:
arg = self._scan_argument()
if len(arg) == 0:
seq, rest = ' ' + self._ACCENTS[cs], ''
elif arg.startswith('\\i') or arg.startswith('\\j'):
# Unicode combining marks should be applied to the
# regular i, not the dotless i.
seq, rest = arg[1] + self._ACCENTS[cs], arg[2:]
else:
seq, rest = arg[0] + self._ACCENTS[cs], arg[1:]
return unicodedata.normalize('NFC', seq) + rest
return None
def color(string, color='', graphic=''):
"""
Change text color for the Linux terminal.
Args:
string (str): String to colorify
color (str): Color to colorify the string in the following list:
black, red, green, yellow, blue, purple, cyan, gr[ae]y
graphic (str): Graphic to append to the beginning of the line
"""
if not color:
if string.startswith("[!] "):
color = 'red'
elif string.startswith("[+] "):
color = 'green'
elif string.startswith("[*] "):
color = 'blue'
else:
color = 'normal'
if color not in colors:
print(colors['red'] + 'Color not found: {}'.format(color) + colors['normal'])
return
if color:
return colors[color] + graphic + string + colors['normal']
else:
return string + colors['normal']
def match(self, substring):
string = self.string
pos = self.pos
if self.ignore_space:
try:
for c in substring:
while True:
if string[pos].isspace():
# Skip over the whitespace.
pos += 1
elif string[pos] == "#":
# Skip over the comment to the end of the line.
pos = string.index("\n", pos)
else:
break
if string[pos] != c:
return False
pos += 1
self.pos = pos
return True
except IndexError:
# We've reached the end of the string.
return False
except ValueError:
# The comment extended to the end of the string.
return False
else:
if not string.startswith(substring, pos):
return False
self.pos = pos + len(substring)
return True
def lhost():
"""
Return the local IP.
"""
if os.name != "nt":
import fcntl
import struct
def get_interface_ip(ifname):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack('256s', ifname[:15])
)[20:24])
except IOError:
return ""
ip = ""
try:
ip = socket.gethostbyname(socket.gethostname())
except socket.gaierror:
pass
except:
print "Unexpected error:", sys.exc_info()[0]
return ip
if (ip == "" or ip.startswith("127.")) and os.name != "nt":
interfaces = ["eth0", "eth1", "eth2", "wlan0", "wlan1", "wifi0", "ath0", "ath1", "ppp0"]
for ifname in interfaces:
try:
ip = get_interface_ip(ifname)
if ip != "":
break
except:
print "Unexpected error:", sys.exc_info()[0]
pass
return ip
def color(string, color=None):
"""
Change text color for the Linux terminal.
"""
attr = []
# bold
attr.append('1')
if color:
if color.lower() == "red":
attr.append('31')
elif color.lower() == "yellow":
attr.append('33')
elif color.lower() == "green":
attr.append('32')
elif color.lower() == "blue":
attr.append('34')
return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
else:
if string.startswith("[!]"):
attr.append('31')
return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
elif string.startswith("[+]"):
attr.append('32')
return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
elif string.startswith("[*]"):
attr.append('34')
return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
else:
return string
def complete_path(text, line, arg=False):
"""
Helper for tab-completion of file paths.
"""
# stolen from dataq at
# http://stackoverflow.com/questions/16826172/filename-tab-completion-in-cmd-cmd-of-python
if arg:
# if we have "command something path"
argData = line.split()[1:]
else:
# if we have "command path"
argData = line.split()[0:]
if not argData or len(argData) == 1:
completions = os.listdir('./')
else:
dir, part, base = argData[-1].rpartition('/')
if part == '':
dir = './'
elif dir == '':
dir = '/'
completions = []
for f in os.listdir(dir):
if f.startswith(base):
if os.path.isfile(os.path.join(dir, f)):
completions.append(f)
else:
completions.append(f+'/')
return completions
def check_split(source, target, edits):
s = []
t = []
# Collect the tokens
for e in edits:
s_tok = source[e[1]:e[2]].orth_.replace("'", "")
t_tok = target[e[3]:e[4]].orth_.replace("'", "")
if len(s_tok) >= 1: s.append(s_tok)
if len(t_tok) >= 1: t.append(t_tok)
if len(s) == len(t):
return False
elif len(s) == 1 and len(t) > 1:
string = s[0]
tokens = t
elif len(t) == 1 and len(s) > 1:
string = t[0]
tokens = s
else:
return False
# Check split
if string.startswith(tokens[0]): # Matches beginning
string = string[len(tokens[0]):]
if string.endswith(tokens[-1]): # Matches end
string = string[:-len(tokens[-1])]
# Matches all tokens in the middle (in order)
match = True
for t in tokens[1:-1]:
try:
i = string.index(t)
string = string[i+len(t):]
except:
# Token not found
return False
# All tokens found
return True
# Any other case is False
return False
# Input 1: Spacy source sentence
# Input 2: Spacy target sentence
# Input 3: The alignmen between the 2; [e.g. M, M, S ,S M]
# Function that decide whether to merge, or keep separate, adjacent edits of various types
# Processes 1 alignment at a time