Skip to content

Commit

Permalink
Added parameter to control if files are sorted before being passed to…
Browse files Browse the repository at this point in the history
… pandoc (#378)

* Added parameter to control if files are sorted before being passed to pandoc

* fix: addressing params ordering comment in PR review

---------

Co-authored-by: Jessica Tegner <[email protected]>
  • Loading branch information
colinbull and JessicaTegner authored Oct 27, 2024
1 parent 50f3b08 commit 8bc3c51
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https
* [Alex Kneisel](https://github.com/hey-thanks/) - Added pathlib.Path support to convert_file.
* [Juho Vepsäläinen](https://github.com/bebraw/) - Creator and former maintainer of pypandoc
* [Connor](https://github.com/DisSupEng/) - Updated Dockerfile to Python 3.9 image and added docker compose file
* [Colin Bull](https://github.com/colinbull) - Added ability to control whether files are sorted before being passed to pandoc process.

## License

Expand Down
18 changes: 13 additions & 5 deletions pypandoc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin
:param bool sandbox: Run pandoc in pandocs own sandbox mode, limiting IO operations in readers and writers to reading the files specified on the command line. Anyone using pandoc on untrusted user input should use this option. Note: This only does something, on pandoc >= 2.15
(Default value = False)
:param str cworkdir: set the current working directory (Default value = None)
:returns: converted string (unicode) or an empty string if an outputfile was given
:rtype: unicode
Expand All @@ -98,7 +100,7 @@ def convert_text(source:str, to:str, format:str, extra_args:Iterable=(), encodin
def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:Union[str, None]=None,
extra_args:Iterable=(), encoding:str='utf-8', outputfile:Union[None, str, Path]=None,
filters:Union[Iterable, None]=None, verify_format:bool=True, sandbox:bool=False,
cworkdir:Union[str, None]=None) -> str:
cworkdir:Union[str, None]=None, sort_files=True) -> str:
"""Converts given `source` from `format` to `to`.
:param (str, list, pathlib.Path) source_file: If a string, should be either
Expand Down Expand Up @@ -131,6 +133,10 @@ def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:U
:param bool sandbox: Run pandoc in pandocs own sandbox mode, limiting IO operations in readers and writers to reading the files specified on the command line. Anyone using pandoc on untrusted user input should use this option. Note: This only does something, on pandoc >= 2.15
(Default value = False)
:param str cworkdir: set the current working directory (Default value = None)
:param bool sort_files: causes the files to be sorted before being passed to pandoc (Default value = True)
:returns: converted string (unicode) or an empty string if an outputfile was given
:rtype: unicode
Expand Down Expand Up @@ -200,7 +206,7 @@ def convert_file(source_file:Union[list, str, Path, Generator], to:str, format:U
return _convert_input(discovered_source_files, format, 'path', to, extra_args=extra_args,
outputfile=outputfile, filters=filters,
verify_format=verify_format, sandbox=sandbox,
cworkdir=cworkdir)
cworkdir=cworkdir, sort_files=sort_files)


def _identify_path(source) -> bool:
Expand Down Expand Up @@ -356,7 +362,7 @@ def _validate_formats(format, to, outputfile):

def _convert_input(source, format, input_type, to, extra_args=(),
outputfile=None, filters=None, verify_format=True,
sandbox=False, cworkdir=None):
sandbox=False, cworkdir=None, sort_files=True):

_check_log_handler()

Expand All @@ -379,8 +385,10 @@ def _convert_input(source, format, input_type, to, extra_args=(),
input_file = source
else:
input_file = []

input_file = sorted(input_file)

if sort_files:
input_file = sorted(input_file)

args = [__pandoc_path, '--from=' + format]

args.append('--to=' + to)
Expand Down
11 changes: 9 additions & 2 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def capture(command, *args, **kwargs):


@contextlib.contextmanager
def closed_tempfile(suffix, text=None, dir_name=None):
def closed_tempfile(suffix, text=None, dir_name=None, prefix=None):
file_name = None
try:
if dir_name:
dir_name = tempfile.mkdtemp(suffix=dir_name)

with tempfile.NamedTemporaryFile('w+t', suffix=suffix, delete=False, dir=dir_name) as test_file:
with tempfile.NamedTemporaryFile('w+t', suffix=suffix, prefix=prefix, delete=False, dir=dir_name) as test_file:
file_name = test_file.name
if text:
test_file.write(text)
Expand Down Expand Up @@ -201,6 +201,13 @@ def test_basic_conversion_from_multiple_files(self):
received = pypandoc.convert_file([file_name1,file_name2], 'html')
self.assertEqualExceptForNewlineEnd(expected, received)

def test_sorting_rules_applied_for_multiple_files(self):
with closed_tempfile('.md', prefix='1_', text='some title 1') as file_name1:
with closed_tempfile('.md', prefix='2_', text='some title 2') as file_name2:
expected = '<p>some title 2</p>\n<p>some title 1</p>'
received = pypandoc.convert_file([file_name2,file_name1], 'html', sort_files=False)
self.assertEqualExceptForNewlineEnd(expected, received)

def test_basic_conversion_from_file_pattern(self):
received = pypandoc.convert_file("./*.md", 'html')
received = received.lower()
Expand Down

0 comments on commit 8bc3c51

Please sign in to comment.