From 8bc3c51ee58d6ba33f14f52ad7c7f6b6b91b0ea2 Mon Sep 17 00:00:00 2001 From: Colin Date: Sun, 27 Oct 2024 13:10:42 +0000 Subject: [PATCH] Added parameter to control if files are sorted before being passed to pandoc (#378) * Added parameter to control if files are sorted before being passed to pandoc * fix: addressing params ordering comment in PR review --------- Co-authored-by: Jessica Tegner --- README.md | 1 + pypandoc/__init__.py | 18 +++++++++++++----- tests.py | 11 +++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5080b76..1881c16 100644 --- a/README.md +++ b/README.md @@ -312,6 +312,7 @@ Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https * [Alex Kneisel](https://github.com/hey-thanks/) - Added pathlib.Path support to convert_file. * [Juho Vepsäläinen](https://github.com/bebraw/) - Creator and former maintainer of pypandoc * [Connor](https://github.com/DisSupEng/) - Updated Dockerfile to Python 3.9 image and added docker compose file +* [Colin Bull](https://github.com/colinbull) - Added ability to control whether files are sorted before being passed to pandoc process. ## License diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py index f8e23ae..46e4128 100644 --- a/pypandoc/__init__.py +++ b/pypandoc/__init__.py @@ -81,6 +81,8 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin :param bool sandbox: Run pandoc in pandocs own sandbox mode, limiting IO operations in readers and writers to reading the files specified on the command line. Anyone using pandoc on untrusted user input should use this option. Note: This only does something, on pandoc >= 2.15 (Default value = False) + :param str cworkdir: set the current working directory (Default value = None) + :returns: converted string (unicode) or an empty string if an outputfile was given :rtype: unicode @@ -98,7 +100,7 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:Union[str, None]=None, extra_args:Iterable=(), encoding:str='utf-8', outputfile:Union[None, str, Path]=None, filters:Union[Iterable, None]=None, verify_format:bool=True, sandbox:bool=False, - cworkdir:Union[str, None]=None) -> str: + cworkdir:Union[str, None]=None, sort_files=True) -> str: """Converts given `source` from `format` to `to`. :param (str, list, pathlib.Path) source_file: If a string, should be either @@ -131,6 +133,10 @@ def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:U :param bool sandbox: Run pandoc in pandocs own sandbox mode, limiting IO operations in readers and writers to reading the files specified on the command line. Anyone using pandoc on untrusted user input should use this option. Note: This only does something, on pandoc >= 2.15 (Default value = False) + :param str cworkdir: set the current working directory (Default value = None) + + :param bool sort_files: causes the files to be sorted before being passed to pandoc (Default value = True) + :returns: converted string (unicode) or an empty string if an outputfile was given :rtype: unicode @@ -200,7 +206,7 @@ def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:U return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args, outputfile=outputfile, filters=filters, verify_format=verify_format, sandbox=sandbox, - cworkdir=cworkdir) + cworkdir=cworkdir, sort_files=sort_files) def _identify_path(source) -> bool: @@ -356,7 +362,7 @@ def _validate_formats(format, to, outputfile): def _convert_input(source, format, input_type, to, extra_args=(), outputfile=None, filters=None, verify_format=True, - sandbox=False, cworkdir=None): + sandbox=False, cworkdir=None, sort_files=True): _check_log_handler() @@ -379,8 +385,10 @@ def _convert_input(source, format, input_type, to, extra_args=(), input_file = source else: input_file = [] - - input_file = sorted(input_file) + + if sort_files: + input_file = sorted(input_file) + args = [__pandoc_path, '--from=' + format] args.append('--to=' + to) diff --git a/tests.py b/tests.py index 097cd8f..e4dce1c 100755 --- a/tests.py +++ b/tests.py @@ -31,13 +31,13 @@ def capture(command, *args, **kwargs): @contextlib.contextmanager -def closed_tempfile(suffix, text=None, dir_name=None): +def closed_tempfile(suffix, text=None, dir_name=None, prefix=None): file_name = None try: if dir_name: dir_name = tempfile.mkdtemp(suffix=dir_name) - with tempfile.NamedTemporaryFile('w+t', suffix=suffix, delete=False, dir=dir_name) as test_file: + with tempfile.NamedTemporaryFile('w+t', suffix=suffix, prefix=prefix, delete=False, dir=dir_name) as test_file: file_name = test_file.name if text: test_file.write(text) @@ -201,6 +201,13 @@ def test_basic_conversion_from_multiple_files(self): received = pypandoc.convert_file([file_name1,file_name2], 'html') self.assertEqualExceptForNewlineEnd(expected, received) + def test_sorting_rules_applied_for_multiple_files(self): + with closed_tempfile('.md', prefix='1_', text='some title 1') as file_name1: + with closed_tempfile('.md', prefix='2_', text='some title 2') as file_name2: + expected = '

some title 2

\n

some title 1

' + received = pypandoc.convert_file([file_name2,file_name1], 'html', sort_files=False) + self.assertEqualExceptForNewlineEnd(expected, received) + def test_basic_conversion_from_file_pattern(self): received = pypandoc.convert_file("./*.md", 'html') received = received.lower()