def markdown_preprocess(markdown_content):
readme_rendered = mistune.markdown(markdown_content, escape=False)
soup = BeautifulSoup(readme_rendered, "html.parser")
# Replace anchors with content where relevant and extract otherwise
for link in soup.findAll('a'):
if link.text.startswith('http'):
link.extract()
else:
link.replaceWithChildren()
# Remove all the images
for image in soup.findAll('img'):
image.extract()
# Remove all the code blocks
for code_block in soup.findAll('code'):
code_block.extract()
return soup.text
pytextrank_textrank_scoring.py 文件源码
python
阅读 30
收藏 0
点赞 0
评论 0
评论列表
文章目录