def save_part(lines):
"""save last part on rst file"""
# has content ?
if len(lines) > 2:
# get path
title = lines[0].replace(' ', '_')
path = os.path.join(README_DOC_PATH, '%s.rst' % title)
# get content
content = '\n'.join(lines[:-2])
content = content.replace('docs/img/', ' ../img/')
print(content)
# save
pypandoc.convert_text(content, 'rst', format='md', outputfile=path)
# flush last part
return lines[-2:]
# create directory
python类convert_text()的实例源码
def get_long_description():
current_dir = path.abspath(path.dirname(__file__))
readme_path = path.join(current_dir, 'README.md')
with open(readme_path, encoding='utf-8') as f:
try:
import pypandoc
long_description = pypandoc.convert_text(f.read(), 'rst', 'markdown_github').replace('\r', '')
except(OSError, ImportError):
print('\n\n!!! pandoc not found. long_description is not correct. Do not upload this to PyPI. !!!\n\n')
long_description = f.read()
return long_description
def __init__(self, source_data):
try:
import pypandoc
except ImportError as e:
raise PypandocImportError(e)
super(MediaWikiTableFormatter, self).__init__(
pypandoc.convert_text(source_data, "html", format="mediawiki"))
def md2rst(text):
return pypandoc.convert_text(text, 'rst', format='markdown_github')
def convert(file, stdin, instanceid):
if stdin:
text = ""
with open(file, 'r') as filehandle:
text = filehandle.read()
# this fails
pypandoc.convert_text(text, "asciidoc",
format="html",
outputfile="%s.asciidoc" % instanceid)
else: # this works
output = pypandoc.convert_file(file, "asciidoc",
format="html")
def main(input_file, output='', to='', extra_args=None):
"""
the main process
:param input_file: the path of input mdac file
:param output: the output file full path
:param to: the output format of the file
:param extra_args: a list of extra arguments provided to pandoc
"""
if to:
logging.warning('it is recommend to include command line arguments in'
'your mdac file meta')
# get the file name if no file name is specified
if not output:
output = __get_output_filename__(input_file=input_file,
config={},
output_format=to)
compile_function = __get_compile_function__(to)
with open(input_file, 'r', encoding='utf-8') as f:
input_content = f.read()
pre_compile_res = pre_compile(input_content)
compile_res = compile_function(pre_compile_res)
pypandoc.convert_text(source=compile_res,
outputfile=output,
format='md',
to=to,
extra_args=extra_args)
else:
__compile_with_reg_data__(input_file)
def read_description(filename):
with codecs.open(filename, encoding='utf-8') as f:
try:
import pypandoc
return pypandoc.convert_text(f.read(), format='md', to='rst')
except ImportError:
return f.read()
def get_long_description():
"""Pypi doesn't like .md - I don't like .rst - let's compromise."""
here = Path(__file__).parent
readme = (here / 'README.md').read_text()
changelog = (here / 'CHANGELOG.md').read_text()
if 'upload' in sys.argv:
import pypandoc
readme = pypandoc.convert_text(readme, to='rst', format='md')
changelog = pypandoc.convert_text(changelog, to='rst', format='md')
changelog = linkify(changelog)
return "%s\n\n%s" % (readme, changelog)
def md2html(content, to_file):
import pypandoc
if os.path.isfile(content):
content = open(content).read()
if not os.path.splitext(to_file)[1] == '.html':
to_file += '.html'
output = pypandoc.convert_text(content, 'html', format = 'md')
with open(to_file, 'w') as f:
f.write(output)
def get_long_description():
descr = _read(README_PATH)
try:
import pypandoc
descr = pypandoc.convert_text(descr, to='rst', format='md')
except (IOError, ImportError):
pass
return descr
def to_pdf_file(self, filename):
results = self.sorted_results
tally = lambda c, s: sum(1 for res in results if res['issue_confidence'] == c and res['issue_severity'] == s)
summary_table = [[s] + [tally(c, s) for c in reversed(bandit.RANKING)] for s in reversed(bandit.RANKING)]
summary_table = tabulate.tabulate(
summary_table,
headers=[''] + list(reversed(bandit.RANKING)),
tablefmt='markdown'
)
ctr = collections.Counter()
ctr.update(((res['test_id'], res['test_name'], res['issue_severity']) for res in results))
findings_table = [(k[0], k[1], k[2], v) for k, v in ctr.items()]
findings_table = sorted(findings_table, key=lambda k: k[3])
findings_table = sorted(findings_table, key=lambda k: bandit.RANKING_VALUES[k[2]])
findings_table = reversed(findings_table)
findings_table = tabulate.tabulate(
findings_table,
headers=('ID', 'Finding Name', 'Severity', 'Occurrences'),
tablefmt='markdown'
)
text = PDF_TEMPLATE.render(
extra=self.data.get('_jj'),
findings_table=findings_table,
results=results,
python_version=self.data.get('python_version'),
summary_table=summary_table,
timestamp=self.generated_at
)
pypandoc.convert_text(text, 'pdf', extra_args=['--latex-engine=xelatex'], format='markdown', outputfile=filename)
def markdown_to_reveal(text: str, config: Config) -> str:
"""
Transform a Markdown input file to an HTML (reveal.js) output string.
Parameters
----------
markdown_text
Markdown text to convert to HTML.
config
Markdownreveal configuration.
Returns
-------
The converted string.
"""
extra_args = [
'-s',
'--slide-level=2',
'-V', 'revealjs-url=revealjs',
]
if config['katex']:
extra_args.extend([
'--katex=katex/katex.min.js',
'--katex-stylesheet=katex/katex.min.css',
])
extra_args.extend(pandoc_extra_to_args(config))
extra_args.extend(reveal_extra_to_args(config))
input_format = 'markdown'
if config['emoji_codes']:
input_format += '+emoji'
output = convert_text(
source=text,
format=input_format,
to='revealjs',
extra_args=extra_args,
)
# HTML substitution
output = tweak_html(output, config)
return output
def read_long_description():
try:
import pypandoc
with open("README.md") as f:
text = f.read()
# Remove screenshots as they get rendered poorly on PyPi
stripped_text = text[:text.index("# Screenshots")].rstrip()
return pypandoc.convert_text(stripped_text, 'rst', format='md')
except:
return ""
def _process_value(self, config_section, config_key, value_type):
"""
Process the value
:param config_section: The section in the config
:param config_key: The key in the config
:param value_type: The type of the value
:return: The processed value
"""
if config_section not in self.config:
return None
if config_key not in self.config[config_section]:
return None
value = self.config[config_section][config_key]
if value_type == bool:
value = value.strip().lower() == 'true'
elif value_type == str:
if value.startswith('file://'):
value_path = os.path.join(self.base_path, value[len('file://'):])
with open(value_path, 'r') as value_file:
value = value_file.read()
if value_path.lower().endswith('.md'):
try:
import pypandoc
value = pypandoc.convert_text(value, 'rst', format='md')
value = value.replace("\r", "")
except ImportError:
print("Pandoc not found. Markdown to reStructuredText conversion failed.")
elif value_type == list:
if value.startswith('file://'):
value_path = os.path.join(self.base_path, value[len('file://'):])
with open(value_path, 'r') as value_file:
value = value_file.readlines()
value = filter(lambda k: bool(k), value)
value = list(map(lambda k: k.strip().replace('\n', ''), value))
else:
value = value.split(',')
return value
def as_json(document):
"JSON representation of the markdown document"
return json.loads(pypandoc.convert_text(document, 'json',
format='markdown'))
def convert(source: str, to: str, extra_args=(),
output_file: str=None) -> None:
"""
Convert a source document to an output file.
Parameters
----------
source : str
to : str
extra_args : iterable
output_file : str
Notes
-----
Either writes to ``output_file`` or prints to stdout.
"""
output_name = (
os.path.splitext(os.path.basename(output_file))[0]
if output_file is not None
else 'std_out'
)
standalone = '--standalone' in extra_args
self_contained = '--self-contained' in extra_args
stitcher = Stitch(name=output_name, to=to, standalone=standalone,
self_contained=self_contained)
result = stitcher.stitch(source)
result = json.dumps(result)
newdoc = pypandoc.convert_text(result, to, format='json',
extra_args=extra_args,
outputfile=output_file)
if output_file is None:
print(newdoc)
def tokenize(source: str) -> dict:
"""
Convert a document to pandoc's JSON AST.
"""
return json.loads(pypandoc.convert_text(source, 'json', 'markdown'))
def convert_figure(self):
r""" This uses pandoc to convert the explicit table text only. """
pandoc_args = ["--mathjax"]
try:
converted_table = pypandoc.convert_text(
self.table_text,
to='markdown',
format='latex',
extra_args = pandoc_args)
except AttributeError:
# for pypandoc version before 1.2
converted_table = pypandoc.convert(
self.table_text,
to='markdown',
format='latex',
extra_args=pandoc_args)
if self.label_text:
this_label = self.label_text
else:
this_label = "tbl:" + self.uid
converted_caption = ": {} {{#{}}}".format(self.caption_text.replace("\n", " "), this_label)
self.output_content = "{}{}".format(converted_table, converted_caption)
def run_pandoc(content, extra=[]):
""" Creates a temporary file, runs pandoc TeX->MD on it (with content)
and then reopens and returns the string. """
print("Running Pandoc (LT -> MD)")
output = pypandoc.convert_text(content, "md", format="latex", extra_args=extra)
return output
def __invoke_pandoc__(input_content: str, output_format: str,
extra_args: dict, output_file: str, default: dict):
"""
invoke pandoc via pypandoc.
:param output_file: the file name of the output file
:param default: the default argument dict.
the key is long name of the para, the
:param input_content: the markdown content ready to be compiled
:param output_format: a string indicate the output format of the file
like 'beamer', 'pdf', 'html')
:param extra_args: all the other argument that pandoc accepts
"""
def __convert_arg(arg: Tuple[str, Union[str, bool, list]]):
"""
a helper that converts the a arg tuple into regular command line arg
:param arg: a tuple, where the first is the long name of the argument
the second is the value of the argument
:return: if the second one is a bool,
"""
# extract info from arg
arg_name = arg[0]
val = arg[1]
# if the value is true
# if the second element is false it should be filtered out by
# the parent function
if val is True:
return '--{arg}'.format(arg=arg_name)
# if the value is a list
# gives the
elif isinstance(val, list):
return ' '.join("--{arg}={val}".format(arg=arg_name, val=element)
for element in val)
# if the val is a string
else:
return "--{arg}={val}".format(arg=arg_name, val=val)
# merge the two dict
arg_dict = default.copy()
arg_dict.update(extra_args) # default will be overloaded via extra_args
# convert extra args to list
# exclude where the second one is false
converted_args = [__convert_arg(arg) for arg in arg_dict.items()
if arg[1] is not False]
# invoke pandoc
pypandoc.convert_text(source=input_content, format='md',
to=output_format, outputfile=output_file,
extra_args=converted_args)