def count_words(candidate_text, common_words=frequency.common_words['english'], case_sensitive=True):
'''
Count the instances of common words in the expected plaintext
language, return the total number of characters matched in each
word
candidate_text - (string) Sample to analyze
common_words - (list) Sequences expected to appear in the text
case_sensitive - (bool) Whether or not to match case sensitively
'''
score = 0
for word in common_words:
if not case_sensitive:
word = word.lower()
num_found = candidate_text.count(word)
if num_found > 0:
score += num_found * len(word)
return score
python类count()的实例源码
def translateDescParams( desc_params ):
desc_params = desc_params.replace(" ", "")
buff = ""
for elem in desc_params.split(","):
if elem != "":
tab = ""
if "[" in elem:
tab = "[" * string.count(elem, "[")
elem = elem[ : tab.find("[") - 2 ]
if elem not in BASIC_TYPES:
if elem in ADVANCED_TYPES:
buff += tab + ADVANCED_TYPES[ elem ] + " "
else:
buff += tab + "L" + elem.replace(".", "/") + "; "
else:
buff += tab + BASIC_TYPES[ elem ] + " "
buff = buff[:-1]
return buff
def translateDescReturn( desc_return ):
buff = ""
for elem in desc_return.split(" "):
tab = ""
if "[" in elem:
tab = "[" * string.count(elem, "[")
elem = elem[ : tab.find("[") - 2 ]
if elem in BASIC_TYPES:
buff += tab + BASIC_TYPES[ elem ] + " "
else:
if elem in ADVANCED_TYPES:
buff += tab + ADVANCED_TYPES[ elem ] + " "
else:
if "." in elem:
buff += tab + "L" + elem.replace(".", "/") + "; "
buff = buff[:-1]
return buff
def __MirrorPath(self, aLocalView, aDestinationPath, aFilePath):
assert(string.count(aFilePath, aLocalView) != -1)
myStartCommonSubpath = string.find(aFilePath, aLocalView)
myStartCommonSubpath += len(aLocalView)
myMirrorPath = aDestinationPath + aFilePath[myStartCommonSubpath:]
myMirrorPath = os.path.dirname(myMirrorPath)
myMirrorPath = os.path.normpath(myMirrorPath)
return myMirrorPath
def generate_frequency_table(text,charset):
'''
Generate a character frequency table for a given text
and charset as dict with character or string as key and
frequency of appearance as value expressed as a decimal
percentage
text - A sample of plaintext to analyze for frequency data
charset - (list of strings) The set of items to count in the plaintext
such as ['a','b','c', ... 'z','aa','ab','ac', ... 'zz']
'''
freq_table = {}
text_len = 0
for char in charset:
freq_table[char] = 0
for char in text:
if char in charset:
freq_table[char] += 1
text_len += 1
for multigraph in filter(lambda x: len(x)>1,charset):
freq_table[multigraph] = string.count(text, multigraph)
# Normalize frequencies with length of text
for key in freq_table.keys():
if text_len != 0:
freq_table[key] /= float(text_len)
else:
freq_table[key] = 0
return freq_table
def main():
pathname = EasyDialogs.AskFileForOpen(message='File to check end-of-lines in:')
if not pathname:
sys.exit(0)
fp = open(pathname, 'rb')
try:
data = fp.read()
except MemoryError:
EasyDialogs.Message('Sorry, file is too big.')
sys.exit(0)
if len(data) == 0:
EasyDialogs.Message('File is empty.')
sys.exit(0)
number_cr = string.count(data, '\r')
number_lf = string.count(data, '\n')
if number_cr == number_lf == 0:
EasyDialogs.Message('File contains no lines.')
if number_cr == 0:
EasyDialogs.Message('File has unix-style line endings')
elif number_lf == 0:
EasyDialogs.Message('File has mac-style line endings')
elif number_cr == number_lf:
EasyDialogs.Message('File probably has MSDOS-style line endings')
else:
EasyDialogs.Message('File has no recognizable line endings (binary file?)')
sys.exit(0)
def puts(self, ts, *args):
if ts == "":
return
ts = puts(ts, *args)
ends_with_line_feed = (ts[-1] == '\n')
max_replace = string.count(ts, '\n') - 1 if ends_with_line_feed else -1
ts = string.replace(ts, '\n', '\n' + tab_character * self.indention, max_replace)
if self.line_feed == 0:
self.template += tab_character * self.indention
self.template += ts
if ends_with_line_feed:
self.line_feed = 0
else:
self.line_feed += 1
def get_rating(self):
"""Find the film's rating. From 0 to 10.
Convert if needed when assigning."""
tmp_rating = gutils.trim(self.page, "ticas por:</B></Center>", "c_critica.pl?id=")
if tmp_rating:
self.rating = str(float(string.count(tmp_rating, 'estrela.gif'))*2)
def main(exts): outlayer(exts=exts) # create output layer lists files_by_type = {} histogram = {} # Check how much source code is available, plus do histogram of chars for ext in exts: files_by_type[ext] = glob.glob('code/*.'+ext) total_bytes = 0 for file in files_by_type[ext]: total_bytes = total_bytes + os.path.getsize(file) source_data = open(file).read() for c in source_data: if c not in string.letters+" \n\r\t": histogram[c] = 1 + histogram.get(c,0) sys.stderr.write("Total bytes of %s-source: %i\n" % (ext,total_bytes))
# Sort the historgram and get a list of input symbols schwartzian = [] for char, count in histogram.items(): schwartzian.append('%8i %s' % (count,char)) schwartzian.sort() schwartzian.reverse() common_symbols = [] for line in schwartzian[:NUM_FEATURES]: common_symbols.append(line[9]) sys.stderr.write('Input set: '+string.join(common_symbols,' ')+'\n')
# Create the actual data set (first randomize file order) allfiles = {} for ext, files in files_by_type.items(): for file in files: allfiles[file] = ext
# Now step through random file order for file, ext in allfiles.items(): fh = open(file) while 1: chunk = fh.read(CHUNK_SIZE) read_len = len(chunk) if read_len < CHUNK_SIZE: break for c in common_symbols: print "%.2f" % (string.count(chunk,c)*NORM_FACTOR), print '>', outlayer(ext)