From 74422566c664e98e14cc411c8660153c37d774bf Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 12:02:37 +0000
Subject: [PATCH 1/7] disable multiprocessing via config

---
 convert.py              | 6 ++++--
 marker/config/parser.py | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index bf99e260..ef9d3cf0 100755
--- a/convert.py
+++ b/convert.py
@@ -2,7 +2,6 @@
 
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for a simple op, which is not supported on MPS
 os.environ["IN_STREAMLIT"] = "true" # Avoid multiprocessing inside surya
-os.environ["PDFTEXT_CPU_WORKERS"] = "1" # Avoid multiprocessing inside pdftext
 
 import argparse
 import torch.multiprocessing as mp
@@ -67,7 +66,7 @@ def process_single_pdf(args):
 @click.option("--chunk_idx", type=int, default=0, help="Chunk index to convert")
 @click.option("--num_chunks", type=int, default=1, help="Number of chunks being processed in parallel")
 @click.option("--max_files", type=int, default=None, help="Maximum number of pdfs to convert")
-@click.option("--workers", type=int, default=5, help="Number of worker processes to use.")
+@click.option("--workers", type=int, default=3, help="Number of worker processes to use.")
 def main(in_folder: str, **kwargs):
     in_folder = os.path.abspath(in_folder)
     files = [os.path.join(in_folder, f) for f in os.listdir(in_folder)]
@@ -84,6 +83,9 @@ def main(in_folder: str, **kwargs):
     if kwargs["max_files"]:
         files_to_convert = files_to_convert[:kwargs["max_files"]]
 
+    # Disable nested multiprocessing 
+    kwargs["disable_multiprocessing"] = True
+
     total_processes = min(len(files_to_convert), kwargs["workers"])
 
     try:
diff --git a/marker/config/parser.py b/marker/config/parser.py
index 62893904..ee1999f1 100644
--- a/marker/config/parser.py
+++ b/marker/config/parser.py
@@ -57,6 +57,9 @@ def generate_config_dict(self) -> Dict[str, any]:
                     if v:
                         with open(v, "r") as f:
                             config.update(json.load(f))
+                case "disable_multiprocessing":
+                    if v:
+                        config["pdftext_workers"] = 1
         return config
 
     def get_renderer(self):
@@ -94,4 +97,3 @@ def get_output_folder(self, filepath: str):
     def get_base_filename(self, filepath: str):
         basename = os.path.basename(filepath)
         return os.path.splitext(basename)[0]
-

From 44f9eb729eec154c3f3db67ac8801bc699b7119e Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 14:04:59 +0000
Subject: [PATCH 2/7] table ocr -> table recognition changes [skip ci]

---
 marker/processors/table.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/marker/processors/table.py b/marker/processors/table.py
index 37402961..c5d01738 100644
--- a/marker/processors/table.py
+++ b/marker/processors/table.py
@@ -17,7 +17,7 @@ class TableProcessor(BaseProcessor):
     detect_boxes = False
     detector_batch_size = None
     table_rec_batch_size = None
-    ocr_batch_size = None
+    recognition_batch_size = None
 
     def __init__(
         self,
@@ -77,7 +77,7 @@ def __call__(self, document: Document):
             needs_ocr,
             [self.table_rec_model, self.table_rec_model.processor, self.recognition_model, self.recognition_model.processor],
             table_rec_batch_size=self.get_table_rec_batch_size(),
-            ocr_batch_size=self.get_ocr_batch_size()
+            ocr_batch_size=self.get_recognition_batch_size()
         )
 
         for table_d, table_res in zip(table_data, tables):
@@ -101,9 +101,9 @@ def get_table_rec_batch_size(self):
             return 64
         return 8
 
-    def get_ocr_batch_size(self):
-        if self.ocr_batch_size is not None:
-            return self.ocr_batch_size
+    def get_recognition_batch_size(self):
+        if self.recognition_batch_size is not None:
+            return self.recognition_batch_size
         elif settings.TORCH_DEVICE_MODEL == "mps":
             return 32
         elif settings.TORCH_DEVICE_MODEL == "cuda":

From dd83ad1bc6a68c794e00d26496c614218773b470 Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 14:07:09 +0000
Subject: [PATCH 3/7] add printer for config crawling for all processors,
 builders and converters [skip ci]

---
 convert_single.py           |  6 +++--
 marker/builders/document.py | 12 ++++++++++
 marker/config/parser.py     |  2 ++
 marker/config/printer.py    | 44 +++++++++++++++++++++++++++++++++++++
 marker/converters/pdf.py    | 10 +++++++++
 5 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 marker/config/printer.py

diff --git a/convert_single.py b/convert_single.py
index 15875454..4ef90b4a 100755
--- a/convert_single.py
+++ b/convert_single.py
@@ -1,20 +1,22 @@
 import os
+
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" # Transformers uses .isin for a simple op, which is not supported on MPS
 
 import time
 
 import click
 
+from marker.config.parser import ConfigParser
+from marker.config.printer import CustomClickPrinter
 from marker.converters.pdf import PdfConverter
 from marker.logger import configure_logging
 from marker.models import create_model_dict
 from marker.output import save_output
-from marker.config.parser import ConfigParser
 
 configure_logging()
 
 
-@click.command(help="Convert a single PDF to markdown.")
+@click.command(cls=CustomClickPrinter, help="Convert a single PDF to markdown.")
 @click.argument("fpath", type=str)
 @ConfigParser.common_options
 def main(fpath: str, **kwargs):
diff --git a/marker/builders/document.py b/marker/builders/document.py
index 59d59bf8..d9729beb 100644
--- a/marker/builders/document.py
+++ b/marker/builders/document.py
@@ -10,6 +10,18 @@
 
 
 class DocumentBuilder(BaseBuilder):
+    """
+    Constructs a Document given a PdfProvider, LayoutBuilder, and OcrBuilder.
+
+    Attributes:
+        lowres_image_dpi (int): 
+            DPI setting for low-resolution page images used for Layout and Line Detection.
+            Default is 96.
+
+        highres_image_dpi (int): 
+            DPI setting for high-resolution page images used for OCR.
+            Default is 192.
+    """
     lowres_image_dpi: int = 96
     highres_image_dpi: int = 192
 
diff --git a/marker/config/parser.py b/marker/config/parser.py
index ee1999f1..bc0016fe 100644
--- a/marker/config/parser.py
+++ b/marker/config/parser.py
@@ -31,6 +31,8 @@ def common_options(fn):
         fn = click.option("--config_json", type=str, default=None,
                           help="Path to JSON file with additional configuration.")(fn)
         fn = click.option("--languages", type=str, default=None, help="Comma separated list of languages to use for OCR.")(fn)
+        fn = click.option("--disable_multiprocessing", is_flag=True, default=False, help="Disable multiprocessing.")(fn)
+        fn = click.option('-l', is_flag=True, help="List available builders, processors and converters")(fn)
         return fn
 
     def generate_config_dict(self) -> Dict[str, any]:
diff --git a/marker/config/printer.py b/marker/config/printer.py
new file mode 100644
index 00000000..cc5207ab
--- /dev/null
+++ b/marker/config/printer.py
@@ -0,0 +1,44 @@
+import importlib
+import inspect
+import pkgutil
+
+import click
+
+from marker.builders import BaseBuilder
+from marker.converters import BaseConverter
+from marker.processors import BaseProcessor
+
+
+def find_subclasses(base_class):
+    """
+    Dynamically find all subclasses of a base class in the module where the base class is defined
+    and its submodules.
+    """
+    subclasses = {}
+    module_name = base_class.__module__
+    package = importlib.import_module(module_name)
+    if hasattr(package, '__path__'):
+        for _, module_name, _ in pkgutil.walk_packages(package.__path__, module_name + "."):
+            try:
+                module = importlib.import_module(module_name)
+                for name, obj in inspect.getmembers(module, inspect.isclass):
+                    if issubclass(obj, base_class) and obj is not base_class:
+                        subclasses[name] = obj
+            except ImportError:
+                pass
+    return subclasses
+
+
+class CustomClickPrinter(click.Command):
+    def parse_args(self, ctx, args):
+        # If '-l' is in the arguments, handle it and exit
+        if '-l' in args:
+            base_classes = [BaseBuilder, BaseProcessor, BaseConverter]
+            for base in base_classes:
+                subclasses = find_subclasses(base)
+                for class_name, class_type in subclasses.items():
+                    doc = class_type.__doc__
+                    if doc:
+                        click.echo(f"{class_name}: {doc}")
+            ctx.exit()
+        super().parse_args(ctx, args)
diff --git a/marker/converters/pdf.py b/marker/converters/pdf.py
index 17a8e27d..24c88e26 100644
--- a/marker/converters/pdf.py
+++ b/marker/converters/pdf.py
@@ -28,6 +28,16 @@
 
 
 class PdfConverter(BaseConverter):
+    """
+    A converter for processing and rendering PDF files into Markdown, JSON, HTML and other formats.
+
+    Attributes:
+        override_map (Dict[BlockTypes, Type[Block]]): 
+            A mapping to override the default block classes for specific block types. 
+            The keys are `BlockTypes` enum values, representing the types of blocks, 
+            and the values are corresponding `Block` class implementations to use 
+            instead of the defaults.
+    """
     override_map: Dict[BlockTypes, Type[Block]] = defaultdict()
 
     def __init__(self, artifact_dict: Dict[str, Any], processor_list: List[str] | None = None, renderer: str | None = None, config=None):

From 39f0999e318146009af835ebf60327c50be1be56 Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 14:33:57 +0000
Subject: [PATCH 4/7] more class docstrings [skip ci]

---
 marker/builders/layout.py    | 16 ++++++++++++++++
 marker/builders/ocr.py       | 17 ++++++++++++++++-
 marker/builders/structure.py | 12 ++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/marker/builders/layout.py b/marker/builders/layout.py
index cf1500a6..464f027b 100644
--- a/marker/builders/layout.py
+++ b/marker/builders/layout.py
@@ -16,6 +16,22 @@
 
 
 class LayoutBuilder(BaseBuilder):
+    """
+    A builder for performing layou detection on PDF pages and merging the results into the document.
+
+    Attributes:
+        batch_size (int):
+            The batch size to use for the layout model.
+            Default is None, which will use the default batch size for the model.
+
+        layout_coverage_min_lines (int):
+            The minimum number of PdfProvider lines that must be covered by the layout model
+            to consider the lines from the PdfProvider valid. Default is 1.
+
+        layout_coverage_threshold (float):
+            The minimum coverage ratio required for the layout model to consider
+            the lines from the PdfProvider valid. Default is 0.3.
+    """
     batch_size = None
     layout_coverage_min_lines = 1
     layout_coverage_threshold = .3
diff --git a/marker/builders/ocr.py b/marker/builders/ocr.py
index b4d53a7c..e169da23 100644
--- a/marker/builders/ocr.py
+++ b/marker/builders/ocr.py
@@ -17,6 +17,21 @@
 
 
 class OcrBuilder(BaseBuilder):
+    """
+    A builder for performing OCR on PDF pages and merging the results into the document.
+
+    Attributes:
+        detection_batch_size (int):
+            The batch size to use for the detection model.
+            Default is None, which will use the default batch size for the model.
+
+        recognition_batch_size (int):
+            The batch size to use for the recognition model.
+            Default is None, which will use the default batch size for the model.
+
+        languages (List[str]):
+            A list of languages to use for OCR. Default is None.
+    """
     recognition_batch_size: int | None = None
     detection_batch_size: int | None = None
     languages: List[str] | None = None
@@ -51,7 +66,7 @@ def ocr_extraction(self, document: Document, provider: PdfProvider) -> ProviderP
         page_list = [page for page in document.pages if page.text_extraction_method == "surya"]
         recognition_results = run_ocr(
             images=[page.lowres_image for page in page_list],
-            langs=[None] * len(page_list),
+            langs=[self.languages] * len(page_list),
             det_model=self.detection_model,
             det_processor=self.detection_model.processor,
             rec_model=self.recognition_model,
diff --git a/marker/builders/structure.py b/marker/builders/structure.py
index c6dfb290..a371e6ed 100644
--- a/marker/builders/structure.py
+++ b/marker/builders/structure.py
@@ -7,6 +7,18 @@
 
 
 class StructureBuilder(BaseBuilder):
+    """
+    A builder for grouping blocks together based on their structure.
+
+    Attributes:
+        gap_threshold (float):
+            The minimum gap between blocks to consider them part of the same group.
+            Default is 0.05.
+
+        list_gap_threshold (float):
+            The minimum gap between list items to consider them part of the same group.
+            Default is 0.1.
+    """
     gap_threshold: int = .05
     list_gap_threshold: int = .1
 

From 41f049ec8b944f98ef2d9fcf86abf8d887ee2d54 Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 14:34:35 +0000
Subject: [PATCH 5/7] oops typo [skip ci]

---
 marker/builders/layout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/marker/builders/layout.py b/marker/builders/layout.py
index 464f027b..03c5bb74 100644
--- a/marker/builders/layout.py
+++ b/marker/builders/layout.py
@@ -17,7 +17,7 @@
 
 class LayoutBuilder(BaseBuilder):
     """
-    A builder for performing layou detection on PDF pages and merging the results into the document.
+    A builder for performing layout detection on PDF pages and merging the results into the document.
 
     Attributes:
         batch_size (int):

From 3b383f83aa086cdb81e8645eb7dde2cee7a9d8d6 Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 15:05:31 +0000
Subject: [PATCH 6/7] add docstrings for all the processors

---
 marker/config/printer.py           |  2 +-
 marker/processors/code.py          |  4 +++-
 marker/processors/debug.py         | 30 ++++++++++++++++++++++++++++-
 marker/processors/document_toc.py  |  5 ++++-
 marker/processors/equation.py      | 18 ++++++++++++++++-
 marker/processors/ignoretext.py    |  9 ++++++++-
 marker/processors/sectionheader.py | 31 +++++++++++++++++++++++++-----
 marker/processors/table.py         | 22 ++++++++++++++++++++-
 marker/processors/text.py          |  8 ++++++++
 9 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/marker/config/printer.py b/marker/config/printer.py
index cc5207ab..0c9d2b00 100644
--- a/marker/config/printer.py
+++ b/marker/config/printer.py
@@ -38,7 +38,7 @@ def parse_args(self, ctx, args):
                 subclasses = find_subclasses(base)
                 for class_name, class_type in subclasses.items():
                     doc = class_type.__doc__
-                    if doc:
+                    if doc and "Attributes:" in doc:
                         click.echo(f"{class_name}: {doc}")
             ctx.exit()
         super().parse_args(ctx, args)
diff --git a/marker/processors/code.py b/marker/processors/code.py
index a4f8cf04..9cd3e925 100644
--- a/marker/processors/code.py
+++ b/marker/processors/code.py
@@ -5,8 +5,10 @@
 
 
 class CodeProcessor(BaseProcessor):
+    """
+    A processor for formatting code blocks.
+    """
     block_types = (BlockTypes.Code, )
-    y_top_threshold = 2 # pixels
 
     def __call__(self, document: Document):
         for page in document.pages:
diff --git a/marker/processors/debug.py b/marker/processors/debug.py
index d6f4d3fe..3cf6cbfc 100644
--- a/marker/processors/debug.py
+++ b/marker/processors/debug.py
@@ -4,13 +4,41 @@
 import requests
 from PIL import Image, ImageDraw, ImageFont
 
-from marker.settings import settings
 from marker.processors import BaseProcessor
 from marker.schema import BlockTypes
 from marker.schema.document import Document
+from marker.settings import settings
 
 
 class DebugProcessor(BaseProcessor):
+    """
+    A processor for debugging the document.
+
+    Attributes:
+        debug_data_folder (str):
+            The folder to dump debug data to.
+            Default is "debug_data".
+
+        debug_layout_images (bool):
+            Whether to dump layout debug images.
+            Default is False.
+
+        debug_pdf_images (bool):
+            Whether to dump PDF debug images.
+            Default is False.
+
+        debug_json (bool):
+            Whether to dump block debug data.
+            Default is False.
+
+        render_font (str):
+            The path to the font to use for rendering debug images.
+            Default is "GoNotoCurrent-Regular.ttf" in the FONT_DIR folder.
+
+        font_dl_path (str):
+            The path to download the font from.
+            Default is "https://github.com/satbyy/go-noto-universal/releases/download/v7.0".
+    """
     block_types = tuple()
     debug_data_folder: str = "debug_data"
     debug_layout_images: bool = False
diff --git a/marker/processors/document_toc.py b/marker/processors/document_toc.py
index c03e2fd3..8ddbcbb7 100644
--- a/marker/processors/document_toc.py
+++ b/marker/processors/document_toc.py
@@ -4,6 +4,9 @@
 
 
 class DocumentTOCProcessor(BaseProcessor):
+    """
+    A processor for generating a table of contents for the document.
+    """
     block_types = (BlockTypes.SectionHeader, )
 
     def __call__(self, document: Document):
@@ -19,4 +22,4 @@ def __call__(self, document: Document):
                     "page_id": page.page_id,
                     "polygon": block.polygon.polygon
                 })
-        document.table_of_contents = toc
\ No newline at end of file
+        document.table_of_contents = toc
diff --git a/marker/processors/equation.py b/marker/processors/equation.py
index c09f89f8..3a4dd405 100644
--- a/marker/processors/equation.py
+++ b/marker/processors/equation.py
@@ -4,13 +4,29 @@
 from texify.model.model import GenerateVisionEncoderDecoderModel
 from tqdm import tqdm
 
-from marker.settings import settings
 from marker.processors import BaseProcessor
 from marker.schema import BlockTypes
 from marker.schema.document import Document
+from marker.settings import settings
 
 
 class EquationProcessor(BaseProcessor):
+    """
+    A processor for recognizing equations in the document.
+
+    Attributes:
+        model_max_length (int):
+            The maximum number of tokens to allow for the Texify model.
+            Default is 384.
+
+        batch_size (int):
+            The batch size to use for the Texify model.
+            Default is None, which will use the default batch size for the model.
+
+        token_buffer (int):
+            The number of tokens to buffer above max for the Texify model.
+            Default is 256.
+    """
     block_types = (BlockTypes.Equation, )
     model_max_length = 384
     batch_size = None
diff --git a/marker/processors/ignoretext.py b/marker/processors/ignoretext.py
index 3e85d04e..eefd4e96 100644
--- a/marker/processors/ignoretext.py
+++ b/marker/processors/ignoretext.py
@@ -6,9 +6,16 @@
 
 
 class IgnoreTextProcessor(BaseProcessor):
+    """
+    A processor for ignoring text blocks that are common elements in the document.
+
+    Attributes:
+        common_element_threshold (float):
+            The minimum fraction of pages that a block must appear in to be considered a common element.
+            Default is 0.6.
+    """
     block_types = (BlockTypes.Text,)
     common_element_threshold = .6
-    max_blocks = 1
 
     def __call__(self, document: Document):
         first_blocks = []
diff --git a/marker/processors/sectionheader.py b/marker/processors/sectionheader.py
index 4fe2f7ea..f4d6ff3e 100644
--- a/marker/processors/sectionheader.py
+++ b/marker/processors/sectionheader.py
@@ -1,18 +1,39 @@
-from marker.processors import BaseProcessor
-from marker.schema import BlockTypes
-from marker.schema.document import Document
-
+import warnings
 from typing import Dict, List
+
 import numpy as np
 from sklearn.cluster import KMeans
 from sklearn.exceptions import ConvergenceWarning
 
+from marker.processors import BaseProcessor
+from marker.schema import BlockTypes
+from marker.schema.document import Document
+
 # Ignore sklearn warning about not converging
-import warnings
 warnings.filterwarnings("ignore", category=ConvergenceWarning)
 
 
 class SectionHeaderProcessor(BaseProcessor):
+    """
+    A processor for recognizing section headers in the document.
+
+    Attributes:
+        level_count (int):
+            The number of levels to use for headings.
+            Default is 4.
+
+        merge_threshold (float):
+            The minimum gap between headings to consider them part of the same group.
+            Default is 0.25.
+
+        default_level (int):
+            The default heading level to use if no heading level is detected.
+            Default is 2.
+
+        height_tolerance (float):
+            The minimum height of a heading to consider it a heading.
+            Default is 0.99.
+    """
     block_types = (BlockTypes.SectionHeader, )
     level_count = 4
     merge_threshold = .25
diff --git a/marker/processors/table.py b/marker/processors/table.py
index c5d01738..3cff8afc 100644
--- a/marker/processors/table.py
+++ b/marker/processors/table.py
@@ -6,13 +6,33 @@
 from tabled.assignment import assign_rows_columns
 from tabled.inference.recognition import get_cells, recognize_tables
 
-from marker.settings import settings
 from marker.processors import BaseProcessor
 from marker.schema import BlockTypes
 from marker.schema.document import Document
+from marker.settings import settings
 
 
 class TableProcessor(BaseProcessor):
+    """
+    A processor for recognizing tables in the document.
+
+    Attributes:
+        detect_boxes (bool):
+            Whether to detect boxes for the table recognition model.
+            Default is False.
+
+        detector_batch_size (int):
+            The batch size to use for the table detection model.
+            Default is None, which will use the default batch size for the model.
+
+        table_rec_batch_size (int):
+            The batch size to use for the table recognition model.
+            Default is None, which will use the default batch size for the model.
+
+        recognition_batch_size (int):
+            The batch size to use for the table recognition model.
+            Default is None, which will use the default batch size for the model.
+    """
     block_types = (BlockTypes.Table, BlockTypes.TableOfContents, BlockTypes.Form)
     detect_boxes = False
     detector_batch_size = None
diff --git a/marker/processors/text.py b/marker/processors/text.py
index 09e3d806..b3ff0e20 100644
--- a/marker/processors/text.py
+++ b/marker/processors/text.py
@@ -10,6 +10,14 @@
 
 
 class TextProcessor(BaseProcessor):
+    """
+    A processor for merging text across pages and columns.
+
+    Attributes:
+        column_gap_ratio (float):
+            The minimum ratio of the page width to the column gap to consider a column break.
+            Default is 0.02.
+    """
     block_types = (BlockTypes.Text, BlockTypes.TextInlineMath)
     column_gap_ratio = 0.02  # column gaps are atleast 2% of the page width
 

From 72ba6ab0717c7411c94dc8a91e82751cbc52290b Mon Sep 17 00:00:00 2001
From: Moses Paul R <iammosespaulr@gmail.com>
Date: Thu, 21 Nov 2024 15:44:04 +0000
Subject: [PATCH 7/7] fix help [skip ci]

---
 marker/config/printer.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/marker/config/printer.py b/marker/config/printer.py
index 0c9d2b00..20eac045 100644
--- a/marker/config/printer.py
+++ b/marker/config/printer.py
@@ -30,15 +30,25 @@ def find_subclasses(base_class):
 
 
 class CustomClickPrinter(click.Command):
+    def get_help(self, ctx):
+        additional_help = (
+            "\n\nTip: Use 'config --help' to display all the attributes of the Builders, Processors, and Converters in Marker."
+        )
+        help_text = super().get_help(ctx)
+        help_text = help_text + additional_help
+        click.echo(help_text)
+
     def parse_args(self, ctx, args):
-        # If '-l' is in the arguments, handle it and exit
-        if '-l' in args:
+        if 'config' in args and '--help' in args:
+            click.echo("Here is a list of all the Builders, Processors, and Converters in Marker along with their attributes:")
             base_classes = [BaseBuilder, BaseProcessor, BaseConverter]
             for base in base_classes:
+                click.echo(f"{base.__name__.removeprefix('Base')}s:\n")
+
                 subclasses = find_subclasses(base)
                 for class_name, class_type in subclasses.items():
                     doc = class_type.__doc__
                     if doc and "Attributes:" in doc:
-                        click.echo(f"{class_name}: {doc}")
+                        click.echo(f"  {class_name}: {doc}")
             ctx.exit()
         super().parse_args(ctx, args)