def getSoupStringConcat(soupTag):
'''
Beautiful soup tags return their content text in the .string parameter if there is only one string child.
Some unfortunate cases on scotus blog have more than one child-string, and this helper just concat's them.
:param soupTag: a bs4 tag that contains one or more strings
:return: a string containing all string children of soupTag, concatenated.
'''
if isinstance(soupTag, NavigableString): return soupTag.string
result = ""
for t in soupTag.descendants:
if t.string is not None and isinstance(t, NavigableString): # only include NavigableStrings (work around .string default searching behavior)
if t.parent.name != "script": # prevent reading js
result = result + t.string
return result
评论列表
文章目录