diff --git a/.gitignore b/.gitignore
index 933a116..0c6bc44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ wandb
 report.json
 benchmark_data
 debug_data
+temp.md
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/marker/v2/converters/pdf.py b/marker/v2/converters/pdf.py
index 0248aeb..680e418 100644
--- a/marker/v2/converters/pdf.py
+++ b/marker/v2/converters/pdf.py
@@ -37,13 +37,11 @@ def __call__(self, filepath: str, page_range: List[int] | None = None):
         equation_processor = EquationProcessor(self.texify_model)
         equation_processor(document)
 
-        # TODO: re-enable once we add OCR method
-        # table_processor = TableProcessor(self.detection_model, self.recognition_model, self.table_rec_model)
-        # table_processor(document)
+        table_processor = TableProcessor(self.detection_model, self.recognition_model, self.table_rec_model)
+        table_processor(document)
 
         renderer = MarkdownRenderer()
-        document_output = document.render()
-        return renderer(document_output)
+        return renderer(document)
 
 
 if __name__ == "__main__":
@@ -57,4 +55,5 @@ def __call__(self, filepath: str, page_range: List[int] | None = None):
         converter = PdfConverter()
         rendered = converter(temp_pdf.name)
 
-        print(rendered)
+        with open("temp.md", "w+") as f:
+            f.write(rendered)
diff --git a/marker/v2/processors/table.py b/marker/v2/processors/table.py
index 5dffc23..6c330aa 100644
--- a/marker/v2/processors/table.py
+++ b/marker/v2/processors/table.py
@@ -33,6 +33,7 @@ def __call__(self, document: Document):
             for block in page.children:
                 if block.block_type != self.block_type:
                     continue
+
                 image_poly = block.polygon.rescale((page.polygon.width, page.polygon.height), page.highres_image.size)
                 image = page.highres_image.crop(image_poly.bbox).convert("RGB")
 
@@ -42,9 +43,9 @@ def __call__(self, document: Document):
                     text_lines = get_page_text_lines(
                         filepath,
                         [page.page_id],
-                        page.highres_image.size,
+                        [page.highres_image.size],
                         flatten_pdf=True
-                    )
+                    )[0]
 
                 table_data.append({
                     "block_id": block.id,
@@ -54,10 +55,7 @@ def __call__(self, document: Document):
                     "img_size": page.highres_image.size
                 })
 
-        lst_format = zip(*(
-            [t[key] for t in table_data]
-            for key in ["table_image", "table_bbox", "img_size", "text_lines"]
-        ))
+        lst_format = [[t[key] for t in table_data] for key in ["table_image", "table_bbox", "img_size", "text_lines"]]
 
         cells, needs_ocr = get_cells(
             *lst_format,
diff --git a/marker/v2/providers/pdf.py b/marker/v2/providers/pdf.py
index 6628106..3c9de38 100644
--- a/marker/v2/providers/pdf.py
+++ b/marker/v2/providers/pdf.py
@@ -163,7 +163,10 @@ def merge_lines(
 
         return page_lines, page_spans
 
-    def font_flags_to_format(self, flags: int) -> Set[str]:
+    def font_flags_to_format(self, flags: int | None) -> Set[str]:
+        if flags is None:
+            return {"plain"}
+
         flag_map = {
             1: "FixedPitch",
             2: "Serif",
@@ -200,8 +203,11 @@ def font_flags_to_format(self, flags: int) -> Set[str]:
                 formats.add("plain")
         return formats
 
-    def font_names_to_format(self, font_name: str) -> Set[str]:
+    def font_names_to_format(self, font_name: str | None) -> Set[str]:
         formats = set()
+        if font_name is None:
+            return formats
+
         if "bold" in font_name.lower():
             formats.add("bold")
         if "ital" in font_name.lower():
@@ -226,16 +232,19 @@ def pdftext_extraction(self) -> Tuple[PageLines, PageSpans]:
                 for line in block["lines"]:
                     spans: List[Span] = []
                     for span in line["spans"]:
-                        if not span["text"].strip():
+                        if not span["text"]:
                             continue
                         font_formats = self.font_flags_to_format(span["font"]["flags"]).union(self.font_names_to_format(span["font"]["name"]))
+                        font_name = span["font"]["name"] or "Unknown"
+                        font_weight = span["font"]["weight"] or 0
+                        font_size = span["font"]["size"] or 0
                         spans.append(
                             Span(
                                 polygon=PolygonBox.from_bbox(span["bbox"]),
                                 text=span["text"],
-                                font=span["font"]["name"],
-                                font_weight=span["font"]["weight"],
-                                font_size=span["font"]["size"],
+                                font=font_name,
+                                font_weight=font_weight,
+                                font_size=font_size,
                                 minimum_position=span["char_start_idx"],
                                 maximum_position=span["char_end_idx"],
                                 formats=list(font_formats),
diff --git a/marker/v2/renderers/__init__.py b/marker/v2/renderers/__init__.py
index 5dfb934..7f6297c 100644
--- a/marker/v2/renderers/__init__.py
+++ b/marker/v2/renderers/__init__.py
@@ -1,4 +1,3 @@
-from enum import Enum
 from typing import Optional
 
 from pydantic import BaseModel
@@ -6,11 +5,6 @@
 from marker.v2.schema import BlockTypes
 
 
-class RenderFormat(str, Enum):
-    json = "json"
-    markdown = "markdown"
-
-
 class BaseRenderer:
     block_type: BlockTypes | None = None
 
@@ -19,6 +13,6 @@ def __init__(self, config: Optional[BaseModel | dict] = None):
             for k in config.model_fields:
                 setattr(self, k, config[k])
 
-    def __call__(self, document_output):
+    def __call__(self, document):
         # Children are in reading order
         raise NotImplementedError
diff --git a/marker/v2/renderers/html.py b/marker/v2/renderers/html.py
new file mode 100644
index 0000000..c2f3743
--- /dev/null
+++ b/marker/v2/renderers/html.py
@@ -0,0 +1,33 @@
+from bs4 import BeautifulSoup
+from marker.v2.renderers import BaseRenderer
+from marker.v2.schema import BlockTypes
+
+
+class HTMLRenderer(BaseRenderer):
+    remove_blocks: list = [BlockTypes.PageHeader, BlockTypes.PageFooter]
+    image_blocks: list = [BlockTypes.Picture, BlockTypes.Figure]
+
+    def extract_html(self, document, document_output):
+        soup = BeautifulSoup(document_output.html, 'html.parser')
+
+        content_refs = soup.find_all('content-ref')
+        ref_block_type = None
+        for ref in content_refs:
+            src = ref.get('src')
+            for item in document_output.children:
+                if item.id == src:
+                    content = self.extract_html(document, item)
+                    ref_block_type = item.id.block_type
+                    break
+
+            if ref_block_type in self.remove_blocks:
+                ref.replace_with('')
+            else:
+                ref.replace_with(BeautifulSoup(f"<div>{content}</div>", 'html.parser'))
+
+        return str(soup)
+
+    def __call__(self, document):
+        document_output = document.render()
+        full_html = self.extract_html(document, document_output)
+        return full_html
diff --git a/marker/v2/renderers/markdown.py b/marker/v2/renderers/markdown.py
index 950e863..8610e56 100644
--- a/marker/v2/renderers/markdown.py
+++ b/marker/v2/renderers/markdown.py
@@ -1,26 +1,17 @@
-from bs4 import BeautifulSoup
 from markdownify import markdownify
-from marker.v2.renderers import BaseRenderer
-
-
-class MarkdownRenderer(BaseRenderer):
-    def extract_html(self, document_output):
-        soup = BeautifulSoup(document_output.html, 'html.parser')
-
-        content_refs = soup.find_all('content-ref')
-        for ref in content_refs:
-            src = ref.get('src')
-            for item in document_output.children:
-                if item.id == src:
-                    content = self.extract_html(item)
-                    break
-
-            ref.replace_with(BeautifulSoup(content, 'html.parser'))
-
-        return str(soup)
-
-    def __call__(self, document_output):
-        full_html = self.extract_html(document_output)
-        return markdownify(full_html)
+from marker.v2.renderers.html import HTMLRenderer
+
+
+class MarkdownRenderer(HTMLRenderer):
+    def __call__(self, document):
+        document_output = document.render()
+        full_html = self.extract_html(document, document_output)
+        return markdownify(
+            full_html,
+            heading_style="ATX",
+            bullets="-",
+            escape_misc=False,
+            escape_underscores=False
+        )
 
 
diff --git a/marker/v2/schema/blocks/base.py b/marker/v2/schema/blocks/base.py
index dbb8ee2..acbba98 100644
--- a/marker/v2/schema/blocks/base.py
+++ b/marker/v2/schema/blocks/base.py
@@ -103,21 +103,21 @@ def raw_text(self, document: Document) -> str:
                 text += "\n"
         return text
 
-    def assemble_html(self, child_blocks):
+    def assemble_html(self, child_blocks, parent_structure=None):
         template = ""
         for c in child_blocks:
             template += f"<content-ref src='{c.id}'></content-ref>"
         return template
 
-    def render(self, document):
+    def render(self, document, parent_structure):
         child_content = []
         if self.structure is not None and len(self.structure) > 0:
             for block_id in self.structure:
                 block = document.get_block(block_id)
-                child_content.append(block.render(document))
+                child_content.append(block.render(document, self.structure))
 
         return BlockOutput(
-            html=self.assemble_html(child_content),
+            html=self.assemble_html(child_content, parent_structure),
             polygon=self.polygon,
             id=self.id,
             children=child_content
diff --git a/marker/v2/schema/blocks/caption.py b/marker/v2/schema/blocks/caption.py
index ab3fd9f..a6fb68c 100644
--- a/marker/v2/schema/blocks/caption.py
+++ b/marker/v2/schema/blocks/caption.py
@@ -4,3 +4,8 @@
 
 class Caption(Block):
     block_type: BlockTypes = BlockTypes.Caption
+
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
+        return f"<p>{template}</p>"
diff --git a/marker/v2/schema/blocks/code.py b/marker/v2/schema/blocks/code.py
index 89100c9..ca8e6e1 100644
--- a/marker/v2/schema/blocks/code.py
+++ b/marker/v2/schema/blocks/code.py
@@ -4,3 +4,7 @@
 
 class Code(Block):
     block_type: BlockTypes = BlockTypes.Code
+
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        return f"<pre>{template}</pre>"
diff --git a/marker/v2/schema/blocks/equation.py b/marker/v2/schema/blocks/equation.py
index 184013b..f3c577e 100644
--- a/marker/v2/schema/blocks/equation.py
+++ b/marker/v2/schema/blocks/equation.py
@@ -5,3 +5,6 @@
 class Equation(Block):
     block_type: BlockTypes = BlockTypes.Equation
     latex: str | None = None
+
+    def assemble_html(self, child_blocks, parent_structure=None):
+        return f"<div class='math'>{self.latex}</div>"
diff --git a/marker/v2/schema/blocks/figure.py b/marker/v2/schema/blocks/figure.py
index e90e15b..acd4f7b 100644
--- a/marker/v2/schema/blocks/figure.py
+++ b/marker/v2/schema/blocks/figure.py
@@ -4,3 +4,6 @@
 
 class Figure(Block):
     block_type: BlockTypes = BlockTypes.Figure
+
+    def assemble_html(self, child_blocks, parent_structure):
+        return f"Image {self.block_id}"
diff --git a/marker/v2/schema/blocks/footnote.py b/marker/v2/schema/blocks/footnote.py
index 1f99283..f775d54 100644
--- a/marker/v2/schema/blocks/footnote.py
+++ b/marker/v2/schema/blocks/footnote.py
@@ -4,3 +4,8 @@
 
 class Footnote(Block):
     block_type: BlockTypes = BlockTypes.Footnote
+
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
+        return f"<p>{template}</p>"
diff --git a/marker/v2/schema/blocks/inlinemath.py b/marker/v2/schema/blocks/inlinemath.py
index f74fe74..c0d564e 100644
--- a/marker/v2/schema/blocks/inlinemath.py
+++ b/marker/v2/schema/blocks/inlinemath.py
@@ -4,3 +4,8 @@
 
 class InlineMath(Block):
     block_type: BlockTypes = BlockTypes.TextInlineMath
+
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
+        return f"<p>{template}</p>"
diff --git a/marker/v2/schema/blocks/listitem.py b/marker/v2/schema/blocks/listitem.py
index 9927e1d..932254b 100644
--- a/marker/v2/schema/blocks/listitem.py
+++ b/marker/v2/schema/blocks/listitem.py
@@ -1,10 +1,21 @@
+import re
+
 from marker.v2.schema import BlockTypes
 from marker.v2.schema.blocks import Block
 
 
+def replace_bullets(text):
+    # Replace bullet characters with a -
+    bullet_pattern = r"(^|[\n ])[•●○■▪▫–—]( )"
+    replaced_string = re.sub(bullet_pattern, r"\1-\2", text)
+    return replaced_string
+
+
 class ListItem(Block):
     block_type: BlockTypes = BlockTypes.ListItem
 
-    def assemble_html(self, child_blocks):
-        template = super().assemble_html(child_blocks)
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
+        template = replace_bullets(template)
         return f"<li>{template}</li>"
diff --git a/marker/v2/schema/blocks/picture.py b/marker/v2/schema/blocks/picture.py
index edb2328..b4e2e17 100644
--- a/marker/v2/schema/blocks/picture.py
+++ b/marker/v2/schema/blocks/picture.py
@@ -4,3 +4,6 @@
 
 class Picture(Block):
     block_type: BlockTypes = BlockTypes.Picture
+
+    def assemble_html(self, child_blocks, parent_structure):
+        return f"Image {self.block_id}"
diff --git a/marker/v2/schema/blocks/sectionheader.py b/marker/v2/schema/blocks/sectionheader.py
index 7a5c85c..a367fc7 100644
--- a/marker/v2/schema/blocks/sectionheader.py
+++ b/marker/v2/schema/blocks/sectionheader.py
@@ -4,3 +4,8 @@
 
 class SectionHeader(Block):
     block_type: BlockTypes = BlockTypes.SectionHeader
+
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
+        return f"<h2>{template}</h2>"
diff --git a/marker/v2/schema/blocks/table.py b/marker/v2/schema/blocks/table.py
index ea7bab2..810ccd4 100644
--- a/marker/v2/schema/blocks/table.py
+++ b/marker/v2/schema/blocks/table.py
@@ -1,5 +1,6 @@
 from typing import List
 
+from tabled.formats import html_format
 from tabled.schema import SpanTableCell
 
 from marker.v2.schema import BlockTypes
@@ -9,3 +10,6 @@
 class Table(Block):
     block_type: BlockTypes = BlockTypes.Table
     cells: List[SpanTableCell] | None = None
+
+    def assemble_html(self, child_blocks, parent_structure=None):
+        return html_format(self.cells)
diff --git a/marker/v2/schema/blocks/text.py b/marker/v2/schema/blocks/text.py
index bcbe410..aaa9a3e 100644
--- a/marker/v2/schema/blocks/text.py
+++ b/marker/v2/schema/blocks/text.py
@@ -5,6 +5,7 @@
 class Text(Block):
     block_type: BlockTypes = BlockTypes.Text
 
-    def assemble_html(self, child_blocks):
-        template = super().assemble_html(child_blocks)
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
+        template = template.replace("\n", " ")
         return f"<p>{template}</p>"
diff --git a/marker/v2/schema/document.py b/marker/v2/schema/document.py
index 8aed380..7e96313 100644
--- a/marker/v2/schema/document.py
+++ b/marker/v2/schema/document.py
@@ -21,10 +21,10 @@ class Document(BaseModel):
     block_type: BlockTypes = BlockTypes.Document
 
     def get_block(self, block_id: BlockId):
-        for page in self.pages:
-            block = page.get_block(block_id)
-            if block:
-                return block
+        page = [p for p in self.pages if p.page_id == block_id.page_id][0]
+        block = page.get_block(block_id)
+        if block:
+            return block
         return None
 
     def assemble_html(self, child_blocks):
@@ -36,7 +36,7 @@ def assemble_html(self, child_blocks):
     def render(self):
         child_content = []
         for page in self.pages:
-            child_content.append(page.render(self))
+            child_content.append(page.render(self, None))
 
         return DocumentOutput(
             children=child_content,
diff --git a/marker/v2/schema/groups/list.py b/marker/v2/schema/groups/list.py
index 3e45cab..0baa293 100644
--- a/marker/v2/schema/groups/list.py
+++ b/marker/v2/schema/groups/list.py
@@ -5,6 +5,6 @@
 class ListGroup(Block):
     block_type: BlockTypes = BlockTypes.ListGroup
 
-    def assemble_html(self, child_blocks):
-        template = super().assemble_html(child_blocks)
+    def assemble_html(self, child_blocks, parent_structure):
+        template = super().assemble_html(child_blocks, parent_structure)
         return f"<ul>{template}</ul>"
diff --git a/marker/v2/schema/groups/page.py b/marker/v2/schema/groups/page.py
index ac23955..ddc1aeb 100644
--- a/marker/v2/schema/groups/page.py
+++ b/marker/v2/schema/groups/page.py
@@ -43,5 +43,5 @@ def add_full_block(self, block: Block) -> Block:
 
     def get_block(self, block_id: BlockId) -> Block | None:
         for block in self.children:
-            if block.id == block_id:
+            if block.block_id == block_id.block_id:
                 return block
diff --git a/marker/v2/schema/groups/table.py b/marker/v2/schema/groups/table.py
index a5732e1..b1b1f2d 100644
--- a/marker/v2/schema/groups/table.py
+++ b/marker/v2/schema/groups/table.py
@@ -1,6 +1,5 @@
 from marker.v2.schema import BlockTypes
 from marker.v2.schema.blocks import Block
 
-
 class TableGroup(Block):
     block_type: BlockTypes = BlockTypes.TableGroup
diff --git a/marker/v2/schema/text/line.py b/marker/v2/schema/text/line.py
index cca4ca1..2ffb12e 100644
--- a/marker/v2/schema/text/line.py
+++ b/marker/v2/schema/text/line.py
@@ -1,28 +1,64 @@
+import re
 from typing import Literal, Optional
 
+import regex
+
 from marker.v2.schema import BlockTypes
 from marker.v2.schema.blocks import Block, BlockOutput
 
+HYPHENS = r'-—¬'
+
+
+def remove_tags(text):
+    return re.sub(r'<[^>]+>', '', text)
+
+
+def replace_last(string, old, new):
+    matches = list(re.finditer(old, string))
+    if not matches:
+        return string
+    last_match = matches[-1]
+    return string[:last_match.start()] + new + string[last_match.end():]
+
+
+def strip_trailing_hyphens(line_text, next_line_text, line_html) -> str:
+    lowercase_letters = r'\p{Ll}|\d'
+
+    hyphen_regex = regex.compile(rf'.*[{HYPHENS}]\s?$', regex.DOTALL)
+    next_line_starts_lowercase = regex.match(rf"^\s?[{lowercase_letters}]", next_line_text)
+
+    if hyphen_regex.match(line_text) and next_line_starts_lowercase:
+        return replace_last(line_html, rf'[{HYPHENS}]', "")
+    return line_html
+
 
 class Line(Block):
     block_type: BlockTypes = BlockTypes.Line
     origin: Optional[Literal["pdftext", "surya"]] = None
 
-    def assemble_html(self, child_blocks):
+    def assemble_html(self, document, child_blocks, parent_structure):
         template = ""
         for c in child_blocks:
             template += c.html
+
+        raw_text = remove_tags(template).strip()
+        structure_idx = parent_structure.index(self.id)
+        if structure_idx < len(parent_structure) - 1:
+            next_block_id = parent_structure[structure_idx + 1]
+            next_line = document.get_block(next_block_id)
+            next_line_raw_text = next_line.raw_text(document)
+            template = strip_trailing_hyphens(raw_text, next_line_raw_text, template)
         return template
 
-    def render(self, document):
+    def render(self, document, parent_structure):
         child_content = []
         if self.structure is not None and len(self.structure) > 0:
             for block_id in self.structure:
                 block = document.get_block(block_id)
-                child_content.append(block.render(document))
+                child_content.append(block.render(document, parent_structure))
 
         return BlockOutput(
-            html=self.assemble_html(child_content),
+            html=self.assemble_html(document, child_content, parent_structure),
             polygon=self.polygon,
             id=self.id,
             children=[]
diff --git a/marker/v2/schema/text/span.py b/marker/v2/schema/text/span.py
index e30cec4..e9d74e9 100644
--- a/marker/v2/schema/text/span.py
+++ b/marker/v2/schema/text/span.py
@@ -23,10 +23,26 @@ def bold(self):
     def italic(self):
         return 'italic' in self.formats
 
-    def assemble_html(self, child_blocks):
-        if len(self.text) > 3:
+    def assemble_html(self, child_blocks, parent_structure):
+        text = self.text
+        text = text.replace("-\n", "")  # Remove hyphenated line breaks
+
+        # Remove trailing newlines
+        replaced_newline = False
+        while len(text) > 0 and text[-1] in ["\n", "\r"]:
+            text = text[:-1]
+            replaced_newline = True
+
+        # Remove leading newlines
+        while len(text) > 0 and text[0] in ["\n", "\r"]:
+            text = text[1:]
+
+        if replaced_newline:
+            text += " "
+
+        if len(text) > 3:
             if self.italic:
-                return f"<i>{self.text}</i>"
+                return f"<i>{text}</i>"
             elif self.bold:
-                return f"<b>{self.text}</b>"
-        return self.text
+                return f"<b>{text}</b>"
+        return text