diff --git a/Cargo.toml b/Cargo.toml
index 144d34d..fc53ae1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,15 +9,18 @@ description = "PDF text extraction"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+
 [dependencies]
-pdf = { git = "https://github.com/pdf-rs/pdf", features = ["cache"] }
-pdf_render = { git = "https://github.com/pdf-rs/pdf_render" }
-font = { git = "https://github.com/pdf-rs/font" }
+pdf_render= { git = "https://github.com/videni/pdf_render_with_vello.git", branch="vello_wip"}
+# pdf_render= { path = "../pdf_render/render"}
+pdf = { git = "https://github.com/pdf-rs/pdf", features = ["cache", "dump"], default-features = false, rev = "9002322822a3773d3d265dee81d855b40f5e0d0a"}
+
 itertools = "*"
 log = "*"
 ordered-float = "*"
 serde = { version = "*", features = ["derive"] }
 unicode-normalization = "0.1.19"
+font = { git = "https://github.com/videni/font", branch = "master", features=['cff']}
 
 pathfinder_geometry = { git = "https://github.com/servo/pathfinder" }
 pathfinder_color = { git = "https://github.com/servo/pathfinder" }
diff --git a/examples/text.rs b/examples/text.rs
index 0917053..3a24b30 100644
--- a/examples/text.rs
+++ b/examples/text.rs
@@ -6,15 +6,35 @@ fn main() {
     let file = FileOptions::cached().open(&input).expect("can't read PDF");
     let resolver = file.resolver();
     
-    for (page_nr, page) in file.pages().enumerate() {
-        let page = page.expect("can't read page");
-        let flow = pdf_text::run(&file, &page, &resolver).expect("can't render page");
-        println!("# page {}", page_nr + 1);
+    // for (page_nr, page) in file.pages().enumerate() {
+        let page: pdf::object::PageRc = file.get_page(0).unwrap();
+        let flow = pdf_text::run(&file, &page, &resolver, Default::default(), false).expect("can't render page");
         for run in flow.runs {
-            for line in run.lines {
-                println!("{}", line.words.iter().map(|w| &w.text).format(" "));
+            for line in &run.lines {
+                println!("{:?}",  line.rect);
+                for word in &line.words {
+                    println!("{}, {:?}", word.text.as_str(), word.rect);
+                    dbg!(&word.chars);
+
+                    let text = &word.text;
+                    let mut offset = 0;
+                    let mut chars = word.chars.iter().peekable();
+                    let mut texts = vec![];
+
+                    while let Some(_) = chars.next() {
+                        // Get text for current char
+                        let s = if let Some(next) = chars.peek() {
+                            let s = &text[offset..next.offset];
+                            offset = next.offset;
+                            s
+                        } else {
+                            &text[offset..]
+                        };
+        
+                        texts.push(s);
+                    }
+                }
             }
-            println!();
         }
-    }
+    // }
 }
diff --git a/src/classify.rs b/src/classify.rs
new file mode 100644
index 0000000..5033738
--- /dev/null
+++ b/src/classify.rs
@@ -0,0 +1,81 @@
+use std::sync::Arc;
+
+use font::Encoder;
+use pdf_render::TextSpan;
+
+use crate::util::is_number;
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Class {
+    Number,
+    Header,
+    Paragraph,
+    Mixed,
+}
+
+pub fn classify<'a, E: Encoder + 'a>(spans: impl Iterator<Item=&'a TextSpan<E>>) -> Class {
+    use pdf_render::FontEntry;
+
+    let mut bold = TriCount::new();
+    let mut numeric = TriCount::new();
+    let mut uniform = TriCount::new();
+    let mut first_font: *const FontEntry<E> = std::ptr::null();
+
+    for s in spans {
+        numeric.add(is_number(&s.text));
+        if let Some(ref font) = s.font {
+            bold.add(font.name.contains("Bold"));
+            let font_ptr = Arc::as_ptr(font);
+            if first_font.is_null() {
+                first_font = font_ptr;
+            } else {
+                uniform.add(font_ptr == first_font);
+            }
+        }
+    }
+    uniform.add(true);
+
+    match (numeric.count(), bold.count(), uniform.count()) {
+        (Tri::True, _, Tri::True) => Class::Number,
+        (_, Tri::True, Tri::True) => Class::Header,
+        (_, Tri::False, Tri::True) => Class::Paragraph,
+        (_, Tri::False, _) => Class::Paragraph,
+        (_, Tri::Maybe(_), _) => Class::Paragraph,
+        _ => Class::Mixed
+    }
+}
+
+pub enum Tri {
+    False,
+    True,
+    Maybe(f32),
+    Unknown,
+}
+
+#[derive(Debug)]
+pub struct TriCount {
+    tru: usize,
+    fal: usize,
+}
+impl TriCount {
+    fn new() -> Self {
+        TriCount {
+            tru: 0,
+            fal: 0
+        }
+    }
+    fn add(&mut self, b: bool) {
+        match b {
+            false => self.fal += 1,
+            true => self.tru += 1,
+        }
+    }
+    fn count(&self) -> Tri {
+        match (self.fal, self.tru) {
+            (0, 0) => Tri::Unknown,
+            (0, _) => Tri::True,
+            (_, 0) => Tri::False,
+            (f, t) => Tri::Maybe(t as f32 / (t + f) as f32)
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/entry.rs b/src/entry.rs
deleted file mode 100644
index 9aeff6e..0000000
--- a/src/entry.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-use serde::{Serialize, Deserialize};
-use table::Table;
-
-use crate::util::{Rect, CellContent};
-
-#[derive(Serialize, Deserialize)]
-pub struct Word {
-    pub text: String,
-    pub rect: Rect,
-}
-#[derive(Serialize, Deserialize)]
-pub struct Line {
-    pub words: Vec<Word>,
-}
-#[derive(Serialize, Deserialize)]
-pub struct Run {
-    pub lines: Vec<Line>,
-    pub kind: RunType,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct Flow {
-    pub lines: Vec<Line>,
-    pub runs: Vec<Run>,
-}
-#[derive(Serialize, Deserialize)]
-pub enum RunType {
-    ParagraphContinuation,
-    Paragraph,
-    Header,
-    Cell,
-}
-
-impl Flow {
-    pub fn new() -> Self {
-        Flow { 
-            lines: vec![],
-            runs: vec![]
-        }
-    }
-    pub fn add_line(&mut self, words: Vec<Word>, kind: RunType) {
-        if words.len() > 0 {
-            self.runs.push(Run {
-                lines: vec![Line { words }], 
-                kind
-            });
-        }
-    }
-    pub fn add_table(&mut self, table: Table<CellContent>) {
-        
-    }
-}
diff --git a/src/flow.rs b/src/flow.rs
new file mode 100644
index 0000000..74b8dbd
--- /dev/null
+++ b/src/flow.rs
@@ -0,0 +1,278 @@
+use crate::classify::{classify, Class};
+use crate::node::{Node, NodeTag};
+use crate::util::avg;
+use crate::text::concat_text;
+use std::iter::once;
+use pathfinder_geometry::rect::RectF;
+use pdf_render::TextSpan;
+
+use std::mem::take;
+use font::Encoder;
+use serde::{Serialize, Deserialize};
+use table::Table;
+
+#[derive(Serialize, Deserialize)]
+pub struct Word {
+    pub text: String,
+    pub rect: Rect,
+    pub chars: Vec<Char>
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Char {
+    // Byte offset
+    pub offset: usize,
+    pub pos: f32,
+    pub width: f32,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct Line {
+    pub words: Vec<Word>,
+    pub rect: Rect,
+}
+#[derive(Serialize, Deserialize)]
+pub struct Run {
+    pub lines: Vec<Line>,
+    pub kind: RunType,
+}
+
+#[derive(Serialize, Deserialize)]
+pub enum RunType {
+    ParagraphContinuation,
+    Paragraph,
+    Header,
+    Cell,
+}
+
+
+#[derive(Copy, Clone, Debug)]
+#[derive(Serialize, Deserialize)]
+#[repr(C)]
+pub struct Rect {
+    pub x: f32,
+    pub y: f32,
+    pub w: f32,
+    pub h: f32
+}
+impl From<RectF> for Rect {
+    fn from(r: RectF) -> Self {
+        Rect {
+            x: r.origin_x(),
+            y: r.origin_y(),
+            w: r.width(),
+            h: r.height()
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct CellContent {
+    pub text: String,
+    pub rect: Rect,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct Flow {
+    pub runs: Vec<Run>,
+}
+
+impl Flow {
+    pub fn new() -> Self {
+        Flow { 
+            runs: vec![]
+        }
+    }
+    pub fn add_line(&mut self, words: Vec<Word>, kind: RunType, rect: Rect) {
+        if words.len() > 0 {
+            self.runs.push(Run {
+                lines: vec![Line { words, rect}], 
+                kind,
+            });
+        }
+    }
+    pub fn add_table(&mut self, table: Table<CellContent>) {
+        
+    }
+}
+
+pub(crate) fn build<E: Encoder>(mut flow: &mut Flow, spans: &[TextSpan<E>], node: &Node, x_anchor: f32) {
+    match *node {
+        Node::Final { ref indices } => {
+            if indices.len() > 0 {
+                let node_spans = indices.iter()
+                    .flat_map(|&i| spans.get(i));
+                let bbox = node_spans.clone()
+                    .map(|s| s.rect)
+                    .reduce(|a, b| a.union_rect(b))
+                    .unwrap();
+                
+                let class = classify(node_spans.clone());
+                let mut text = String::new();
+                let words = concat_text(&mut text, node_spans);
+
+                let t = match class {
+                    Class::Header => RunType::Header,
+                    _ => RunType::Paragraph,
+                };
+              
+                flow.add_line(words, t, bbox.into());
+            }
+        }
+        Node::Grid { ref x, ref y, ref cells, tag } => {
+            match tag {
+                NodeTag::Singleton |
+                NodeTag::Line => {
+                    let mut indices = vec![];
+                    node.indices(&mut indices);
+
+                    let line_spans = indices.iter().flat_map(|&i| spans.get(i));
+                    let bbox: RectF = line_spans.clone().map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
+
+                    let class = classify(line_spans.clone());
+                    let mut text = String::new();
+                    let words = concat_text(&mut text, line_spans);
+
+                    let t = match class {
+                        Class::Header => RunType::Header,
+                        _ => RunType::Paragraph,
+                    };
+                
+                    flow.add_line(words, t, bbox.into());
+                }
+                NodeTag::Paragraph => {
+                    assert_eq!(x.len(), 0, "For paragraph x gaps must be empty");
+
+                    let mut lines: Vec<(RectF, usize)> = vec![];
+                    let mut indices = vec![];
+
+                    for n in cells {
+                        let start: usize = indices.len();
+                        n.indices(&mut indices);
+                        if indices.len() > start {
+                            let cell_spans = indices[start..].iter().flat_map(|&i| spans.get(i));
+                            let bbox = cell_spans.map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
+                            lines.push((bbox, indices.len()));
+                        }
+                    }
+
+                    let para_spans = indices.iter().flat_map(|&i| spans.get(i));
+                    let class = classify(para_spans.clone());
+                    // the bounding box the paragraph
+                    let bbox = lines.iter().map(|t| t.0).reduce(|a, b| a.union_rect(b)).unwrap();
+                    let line_height = avg(para_spans.map(|s| s.rect.height())).unwrap();
+                    
+                    // classify the lines by this vertical line
+                    let left_margin = bbox.min_x() + 0.5 * line_height;
+
+                    // count how many are right and left of the split.
+                    let mut left = 0;
+                    let mut right = 0;
+
+                    for (line_bbox, _) in lines.iter() {
+                        if line_bbox.min_x() >= left_margin {
+                            right += 1;
+                        } else {
+                            left += 1;
+                        }
+                    }
+                    //typically paragraphs are indented to the right and longer than 2 lines.
+                    //then there will be a higher left count than right count.
+                    let indent = left > right;
+
+                    // A paragraph with 3 lines, 3 cases:
+                    // case 1: outdented(right > left, will get 3 runs)
+                    // |-------
+                    // | ----
+                    // | ----
+                    // case 2: indented (left > right, one new run)
+                    // | ------
+                    // |-------
+                    // |-------
+                    // case 3: same x (no indentation, but left > right, right = 0, will be in the same run)
+                    // |------
+                    // |------
+                    // |------
+
+                    //TODO: A paragraph with two lines starts at the same x? then left = right.
+                    // the second line will be treated as as another run, but actually it should be in 
+                    // in the same run.
+
+                    let mut para_start = 0;
+                    let mut line_start = 0;
+                    let mut text = String::new();
+                    let mut para_bbox = RectF::default();
+                    let mut flow_lines = vec![];
+                    for &(line_bbox, end) in lines.iter() {
+                        if line_start != 0 {
+                            //Always add a line break for new line, which will be treated as whitespace in the concat_text method
+                            text.push('\n');
+
+                            // if a line is indented(indent = true) or outdented(indent = false), it marks a new paragraph
+                            // so here, save previous lines as a new run.
+                            if (line_bbox.min_x() >= left_margin) == indent {
+                                flow.runs.push(Run {
+                                    lines: take(&mut flow_lines),
+                                    kind: match class {
+                                        Class::Header => RunType::Header,
+                                        _ => RunType::Paragraph
+                                    },
+                                });
+                                para_start = line_start;
+                            }
+                        }
+                        if end > line_start {
+                            let words = concat_text(&mut text, indices[line_start..end].iter().flat_map(|&i| spans.get(i)));
+
+                            if words.len() > 0 {
+                                flow_lines.push(Line { words , rect: line_bbox.into()});
+                            }
+                        }
+                        if para_start == line_start {
+                            para_bbox = line_bbox;
+                        } else {
+                            para_bbox = para_bbox.union_rect(line_bbox);
+                        }
+                        line_start = end;
+                    }
+
+                    flow.runs.push(Run {
+                        lines: flow_lines,
+                        kind: match class {
+                            Class::Header => RunType::Header,
+                            _ => RunType::Paragraph
+                        }
+                    });
+                }
+                NodeTag::Complex => {
+                    let x_anchors = once(x_anchor).chain(x.iter().cloned()).cycle();
+                    for (node, x) in cells.iter().zip(x_anchors) {
+                        build(flow, spans, node, x);
+                    }
+                }
+            }
+        }
+        Node::Table { ref table } => {
+            if let Some(bbox) = table.values()
+                .flat_map(|v| v.value.iter().flat_map(|&i| spans.get(i).map(|s| s.rect)))
+                .reduce(|a, b| a.union_rect(b)) {
+                let table = table.flat_map(|indices| {
+                    if indices.len() == 0 {
+                        None
+                    } else {
+                        let line_spans = indices.iter().flat_map(|&i| spans.get(i));
+                        let bbox: RectF = line_spans.clone().map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
+
+                        let mut text = String::new();
+                        concat_text(&mut text, line_spans.clone());
+                        Some(CellContent {
+                            text,
+                            rect: bbox.into(),
+                        })
+                    }
+                });
+                flow.add_table(table);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index 8407487..f166c1b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,25 +1,28 @@
 use std::collections::HashSet;
 
-use entry::Flow;
+use flow::Flow;
+use pathfinder_geometry::transform2d::Transform2F;
 use pdf::{backend::Backend, object::{Page, Resolve}, PdfError};
-use pdf_render::{tracer::{TraceCache, Tracer, DrawItem}, Fill, render_pattern, render_page, FillMode};
+use pdf_render::{tracer::{TraceCache, Tracer, DrawItem}, Fill, render_pattern, render_page, FillMode, font::OutlineBuilder};
 
-mod tree;
+mod node;
 mod util;
 mod text;
-pub mod entry;
+mod classify;
+pub mod flow;
 
-pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &impl Resolve) -> Result<Flow, PdfError> {
-    let cache = TraceCache::new();
+pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &impl Resolve, transform: Transform2F, without_header_and_footer: bool) -> Result<Flow, PdfError> {
+    let mut cache = TraceCache::new(OutlineBuilder::default());
 
     let mut clip_paths = vec![];
-    let mut tracer = Tracer::new(&cache, &mut clip_paths);
+    let mut tracer = Tracer::new(&mut cache, &mut clip_paths);
 
-    render_page(&mut tracer, resolve, &page, Default::default())?;
+    //Get text, pattern, image by the Tracer backend.
+    render_page(&mut tracer, resolve, page, transform)?;
 
     let bbox = tracer.view_box();
-
-    let items = tracer.finish();
+    let items: Vec<DrawItem<OutlineBuilder>> = tracer.finish();
+    //Get all patterns which may have lines and texts inside.
     let mut patterns = HashSet::new();
     for item in items.iter() {
         if let DrawItem::Vector(ref v) = item {
@@ -34,6 +37,7 @@ pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &i
 
     let mut spans = vec![];
     let mut lines = vec![];
+
     let mut visit_item = |item| {
         match item {
             DrawItem::Text(t, _) if bbox.intersects(t.rect) => {
@@ -60,6 +64,7 @@ pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &i
         }
     };
 
+    // Analyze patterns to get lines and texts.
     for &p in patterns.iter() {
         let pattern = match resolve.get(p) {
             Ok(p) => p,
@@ -68,7 +73,7 @@ pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &i
                 continue;
             }
         };
-        let mut pat_tracer = Tracer::new(&cache, &mut clip_paths);
+        let mut pat_tracer = Tracer::new(&mut cache, &mut clip_paths);
 
         render_pattern(&mut pat_tracer, &*pattern, resolve)?;
         let pat_items = pat_tracer.finish();
@@ -77,12 +82,16 @@ pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &i
         }
     }
 
+    // After this loop, all the text and lines are ready for further processing.
     for item in items {
         visit_item(item);
     }
 
-    let root = tree::build(&spans, bbox, &lines);
+    let root = node::build(&spans, bbox, &lines, without_header_and_footer);
+
     let mut flow = Flow::new();
-    tree::items(&mut flow, &spans, &root, bbox.min_x());
+  
+    flow::build(&mut flow, &spans, &root, bbox.min_x());
+
     Ok(flow)
 }
\ No newline at end of file
diff --git a/src/node.rs b/src/node.rs
new file mode 100644
index 0000000..20300a4
--- /dev/null
+++ b/src/node.rs
@@ -0,0 +1,265 @@
+mod gap;
+mod line;
+mod render;
+mod table;
+
+use gap::{dist_x, dist_y, gaps, left_right_gap, top_bottom_gap};
+use line::{analyze_lines, overlapping_lines, Lines};
+use pdf_render::TextSpan;
+use pathfinder_geometry::rect::RectF;
+
+
+use crate::classify::{classify, Class};
+use crate::util::avg;
+
+#[cfg(feature="ocr")]
+use tesseract_plumbing::Text;
+
+use std::mem::take;
+use font::Encoder;
+
+pub fn build<E: Encoder>(spans: &[TextSpan<E>], bbox: RectF, lines: &[[f32; 4]], without_header_and_footer: bool) -> Node {
+    if spans.len() == 0 {
+        return Node::singleton(&[]);
+    }
+
+    let mut boxes: Vec<(RectF, usize)> = spans.iter().enumerate().map(|(i, t)| (t.rect, i)).collect();
+    let mut boxes = boxes.as_mut_slice();
+    if without_header_and_footer {
+        boxes = exclude_header_and_footer(boxes, bbox, spans);
+    }
+
+    let lines = analyze_lines(lines);
+    
+    split(&mut boxes, &spans, &lines)
+}
+
+pub fn exclude_header_and_footer<'a, E: Encoder>(boxes: &'a mut [(RectF, usize)], bbox: RectF, spans: &[TextSpan<E>]) -> &'a mut [(RectF, usize)]
+{
+    let avg_font_size: f32 = avg(spans.iter().map(|s| s.font_size)).unwrap();
+
+    let probably_header = |boxes: &[(RectF, usize)]| {
+        let class = classify(boxes.iter().filter_map(|&(_, i)| spans.get(i)));
+        if matches!(class, Class::Header | Class::Number) {
+            return true;
+        }
+        let f = avg(boxes.iter().filter_map(|&(_, i)| spans.get(i)).map(|s| s.font_size)).unwrap();
+        f > avg_font_size
+    };
+    let probably_footer = |boxes: &mut [(RectF, usize)]| {
+        sort_x(boxes);
+        let x_gaps: Vec<f32> = gap::gaps(avg_font_size, boxes, |r| (r.min_x(), r.max_x()))
+            .collect();
+        
+        let is_footer = split_by(boxes, x_gaps.as_slice(), |r| r.min_x())
+            .all(|cell| probably_header(cell));
+
+        is_footer
+    };
+
+    sort_y(boxes);
+
+    let mut boxes = boxes;
+    let (top, bottom) = top_bottom_gap(boxes, bbox);
+    if let Some(bottom) = bottom {
+        if probably_footer(&mut boxes[bottom..]) {
+            boxes = &mut boxes[..bottom];
+        }
+    }
+    if let Some(top) = top {
+        if probably_header(&mut boxes[..top]) {
+            boxes = &mut boxes[top..];
+        }
+    }
+    sort_x(boxes);
+    let (left, right) = left_right_gap(boxes, bbox);
+    if let Some(right) = right {
+        if probably_header(&boxes[right..]) {
+            boxes = &mut boxes[..right];
+        }
+    }
+    if let Some(left) = left {
+        if probably_header(&boxes[..left]) {
+            boxes = &mut boxes[left..];
+        }
+    }
+
+    boxes
+}
+
+
+#[derive(Debug)]
+pub enum Node {
+    Final { indices: Vec<usize> },
+    Grid { 
+        // vertical gaps
+        x: Vec<f32>, 
+        // horizontal gaps
+        y: Vec<f32>, 
+        cells: Vec<Node>,
+        tag: NodeTag 
+    },
+    Table { table: table::Table<Vec<usize>> },
+}
+impl Node {
+    pub fn tag(&self) -> NodeTag {
+        match *self {
+            Node::Grid { tag, .. } => tag,
+            Node::Table { .. } => NodeTag::Complex,
+            Node::Final { .. } => NodeTag::Singleton,
+        }
+    }
+    pub fn indices(&self, out: &mut Vec<usize>) {
+        match *self {
+            Node::Final { ref indices } => out.extend_from_slice(&indices),
+            Node::Grid { ref cells, .. } => {
+                for n in cells {
+                    n.indices(out);
+                }
+            }
+            Node::Table { ref table } => {
+                out.extend(
+                    table.values()
+                        .flat_map(|v| v.value.iter())
+                        .cloned()
+                );
+            }
+        }
+    }
+    pub fn singleton(nodes: &[(RectF, usize)]) -> Self {
+        Node::Final { indices: nodes.iter().map(|t| t.1).collect() }
+    }
+}
+
+#[derive(PartialOrd, Ord, Eq, PartialEq, Clone, Copy, Debug)]
+pub enum NodeTag {
+    Singleton,
+    Line,
+    Paragraph,
+    Complex,
+}
+
+fn split<E: Encoder>(boxes: &mut [(RectF, usize)], spans: &[TextSpan<E>], lines: &Lines) -> Node {
+    let num_boxes = boxes.len();
+    if num_boxes < 2 {
+        return Node::singleton(boxes);
+    }
+
+    sort_x(boxes);
+    let max_x_gap = dist_x(boxes);
+
+    sort_y(boxes);
+    let max_y_gap = dist_y(boxes);
+
+    let x_y_ratio = 1.0;
+
+    let max_gap = match (max_x_gap, max_y_gap) {
+        (Some((x, _)), Some((y, _))) => x.max(y * x_y_ratio),
+        (Some((x, _)), None) => x,
+        (None, Some((y, _))) => y * x_y_ratio,
+        (None, None) => {
+            sort_x(boxes);
+            return Node::singleton(boxes);
+        }
+    };
+    let x_threshold = (max_gap * 0.5).max(1.0);
+    let y_threshold = (max_gap * 0.5 / x_y_ratio).max(0.1);
+
+    let y_gaps: Vec<f32> = gaps(y_threshold, boxes, |r| (r.min_y(), r.max_y()))
+        .collect();
+    
+    sort_x(boxes);
+    let x_gaps: Vec<f32> = gaps(x_threshold, boxes, |r| (r.min_x(), r.max_x()))
+        .collect();
+
+    if x_gaps.len() == 0 && y_gaps.len() == 0 {
+        return overlapping_lines(boxes);
+    }
+
+    //TODO: Disable the table::split for now, because it is not accurate 
+    // if x_gaps.len() > 1 && y_gaps.len() > 1 {
+    //     return table::split(boxes, spans, lines);
+    // }
+
+    assert!(
+        x_gaps.len() > 0 || y_gaps.len() > 0, 
+        "At least one of x_gaps and y_gaps must be non-empty, otherwise the memory will be exhausted"
+    );
+    sort_y(boxes);
+
+    let mut cells = vec![];
+    for row in split_by(boxes, &y_gaps, |r| r.min_y()) {
+        if x_gaps.len() > 0 {
+            sort_x(row);
+            for cell in split_by(row, &x_gaps, |r| r.min_x()) {
+                sort_y(cell);
+                assert!(cell.len() < num_boxes);
+                cells.push(split(cell, spans, lines));
+            }
+        } else {
+            cells.push(split(row, spans, lines));
+        }
+    }
+
+    let tag = match (y_gaps.is_empty(), x_gaps.is_empty()) {
+        // N y gaps, whatever x_gap is, if cells are all lines, then it is a line
+        (true, _) if cells.iter().all(|n| n.tag() <= NodeTag::Line) => NodeTag::Line,
+        // N x gaps, whatever y_gap is, if cells are all lines, then it is a paragraph
+        (_, true) if cells.iter().all(|n| n.tag() <= NodeTag::Line) => NodeTag::Paragraph,
+        // Otherwise it is a complex node
+        _ => NodeTag::Complex
+    };
+
+    Node::Grid {
+        x: x_gaps,
+        y: y_gaps,
+        cells,
+        tag,
+    }
+}
+
+fn sort_x(boxes: &mut [(RectF, usize)]) {
+    boxes.sort_unstable_by(|a, b| a.0.min_x().partial_cmp(&b.0.min_x()).unwrap());
+}
+fn sort_y(boxes: &mut [(RectF, usize)]) {
+    boxes.sort_unstable_by(|a, b| a.0.min_y().partial_cmp(&b.0.min_y()).unwrap());
+}
+
+fn split_by<'a>(list: &'a mut [(RectF, usize)], at: &'a [f32], by: impl Fn(&RectF) -> f32) -> impl Iterator<Item=&'a mut [(RectF, usize)]> {
+    SplitBy {
+        data: list,
+        points: at.iter().cloned(),
+        by,
+        end: false
+    }
+}
+
+struct SplitBy<'a, I, F> {
+    data: &'a mut [(RectF, usize)],
+    points: I,
+    by: F,
+    end: bool,
+}
+impl<'a, I, F> Iterator for SplitBy<'a, I, F> where
+    I: Iterator<Item=f32>,
+    F: Fn(&RectF) -> f32
+{
+    type Item = &'a mut [(RectF, usize)];
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.end {
+            return None;
+        }
+        match self.points.next() {
+            Some(p) => {
+                let idx = self.data.iter().position(|(ref r, _)| (self.by)(r) > p).unwrap_or(self.data.len());
+                let (head, tail) = take(&mut self.data).split_at_mut(idx);
+                self.data = tail;
+                Some(head)
+            },
+            None => {
+                self.end = true;
+                Some(take(&mut self.data))
+            }
+        }
+    }
+}
diff --git a/src/node/gap.rs b/src/node/gap.rs
new file mode 100644
index 0000000..189580e
--- /dev/null
+++ b/src/node/gap.rs
@@ -0,0 +1,133 @@
+use ordered_float::NotNan;
+use pathfinder_geometry::rect::RectF;
+
+/// Find all the gaps in boxes 
+pub fn gap_list<'a>(boxes: &'a [(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32) + 'a) -> impl Iterator<Item=(f32, f32, usize)> + 'a {
+    let mut boxes = boxes.iter();
+    let &(ref r, _) = boxes.next().unwrap();
+    let (_, mut last_max) = span(r);
+
+    boxes.enumerate().filter_map(move |(idx, &(ref r, _))| {
+        let (min, max) = span(&r);
+        let r = if min > last_max {
+            Some((last_max, min, idx+1))
+        } else {
+            None
+        };
+        last_max = max.max(last_max);
+        r
+    })
+}
+
+/// Find every the middle points of a gap in boxes that are greater than the threshold.
+pub fn gaps<'a>(threshold: f32, boxes: &'a [(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32) + 'a) -> impl Iterator<Item=f32> + 'a {
+    let mut boxes = boxes.iter();
+    let &(ref r, _) = boxes.next().unwrap();
+    let (_, mut last_max) = span(r);
+    boxes.filter_map(move |&(ref r, _)| {
+        let (min, max) = span(&r);
+        let r = if min - last_max >= threshold {
+            // The middle position of the gap
+            Some(0.5 * (last_max + min))
+        } else {
+            None
+        };
+        last_max = max.max(last_max);
+        r
+    })
+}
+
+/// Return the size of the max gap and its the middle position.
+pub fn max_gap(boxes: &[(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32)) -> Option<(f32, f32)> {
+    gap_list(boxes, span)
+    .max_by_key(|&(a, b, _)| NotNan::new(b - a).unwrap())
+    .map(|(a, b, _)| (b - a, 0.5 * (a + b)))
+}
+
+pub fn dist_x(boxes: &[(RectF, usize)]) -> Option<(f32, f32)> {
+    max_gap(boxes, |r| (r.min_x(), r.max_x()))
+}
+pub fn dist_y(boxes: &[(RectF, usize)]) -> Option<(f32, f32)> {
+    max_gap(boxes, |r| (r.min_y(), r.max_y()))
+}
+
+pub fn top_bottom_gap(boxes: &mut [(RectF, usize)], bbox: RectF) -> (Option<usize>, Option<usize>) {
+    let num_boxes = boxes.len();
+    if num_boxes < 2 {
+        return (None, None);
+    }
+
+    let mut gaps = gap_list(boxes, |r| (
+        // top left y
+        r.min_y(), 
+        // bottom right y
+        r.max_y()
+    ));
+    let top_limit = bbox.min_y() + bbox.height() * 0.2;
+    let bottom_limit = bbox.min_y() + bbox.height() * 0.8;
+
+    match gaps.next() {
+        Some((y, _, top)) if y < top_limit => {
+            match gaps.last() {
+                Some((y, _, bottom)) if y > bottom_limit => (Some(top), Some(bottom)),
+                _ => (Some(top), None)
+            }
+        }
+        Some((y, _, bottom)) if y > bottom_limit => (None, Some(bottom)),
+        _ => (None, None)
+    }
+}
+
+pub fn left_right_gap(boxes: &mut [(RectF, usize)], bbox: RectF) -> (Option<usize>, Option<usize>) {
+    let num_boxes = boxes.len();
+    if num_boxes < 2 {
+        return (None, None);
+    }
+
+    let mut gaps = gap_list(boxes, |r| (r.min_x(), r.max_x()));
+    let left_limit = bbox.min_x() + bbox.width() * 0.2;
+    let right_limit = bbox.min_x() + bbox.width() * 0.8;
+    match gaps.next() {
+        Some((x, _, left)) if x < left_limit  => {
+            match gaps.last() {
+                Some((x, _, right)) if x > right_limit => (Some(left), Some(right)),
+                _ => (Some(left), None)
+            }
+        }
+        Some((x, _, right)) if x > right_limit => (None, Some(right)),
+        _ => (None, None)
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pathfinder_geometry::rect::RectF;
+    use pathfinder_geometry::vector::Vector2F;
+
+    #[test]
+    fn test_the_gaps_method() {
+        //  3 horizontal rectangles
+        let boxes = vec![
+            (RectF::from_points(Vector2F::new(0.0, 0.0), Vector2F::new(10.0, 10.0)), 1), // Rectangle 1
+            (RectF::from_points(Vector2F::new(12.0, 0.0), Vector2F::new(22.0, 10.0)), 2), // Rectangle 2 (gap from 10 to 12)
+            (RectF::from_points(Vector2F::new(25.0, 0.0),Vector2F::new( 35.0, 10.0)), 3), // Rectangle 3 (gap from 22 to 25)
+        ];
+
+        // Define the threshold for gap detection
+        let threshold = 2.0;
+
+        // Define the span function (maps rectangles to their min and max x-coordinates)
+        let span = |rect: &RectF| (rect.min_x(), rect.max_x());
+
+        // Call the gaps function
+        let gaps: Vec<f32> = gaps(threshold, &boxes, span).collect();
+
+        // Expected gaps are the midpoints of the gaps: [(10+12)/2 = 11, (22+25)/2 = 23.5]
+        let expected_gaps = vec![11.0, 23.5];
+
+        // Assert that the results match the expected values
+        assert_eq!(gaps, expected_gaps);
+    }
+}
\ No newline at end of file
diff --git a/src/node/line.rs b/src/node/line.rs
new file mode 100644
index 0000000..46cc0d0
--- /dev/null
+++ b/src/node/line.rs
@@ -0,0 +1,159 @@
+
+use std::collections::BTreeSet;
+use ordered_float::NotNan;
+use pathfinder_geometry::rect::RectF;
+
+use crate::util::avg;
+
+use super::{sort_x, sort_y, Node, NodeTag};
+
+pub fn analyze_lines(lines: &[[f32; 4]]) -> Lines {
+    let mut hlines = BTreeSet::new();
+    let mut vlines = BTreeSet::new();
+
+    for &[x1, y1, x2, y2] in lines {
+        if x1 == x2 {
+            vlines.insert(NotNan::new(x1).unwrap());
+        } else if y1 == y2 {
+            hlines.insert(NotNan::new(y1).unwrap());
+        }
+    }
+
+    let hlines = dedup(hlines.iter().cloned());
+    let vlines = dedup(vlines.iter().cloned());
+
+    let mut line_grid = vec![false; vlines.len() * hlines.len()];
+    for &[x1, y1, x2, y2] in lines {
+        // vertical line
+        if x1 == x2 {
+            let v_idx = vlines.iter().position(|&(a, b)| a <= x1 && x1 <= b).unwrap_or(vlines.len());
+            let h_start = hlines.iter().position(|&(a, b)| y1 >= a).unwrap_or(hlines.len());
+            let h_end = hlines.iter().position(|&(a, b)| y2 <= b).unwrap_or(hlines.len());
+            for h in h_start .. h_end {
+                line_grid[v_idx * hlines.len() + h] = true;
+            }
+        } 
+        // horizontal line
+        else if y1 == y2 {
+            let h_idx = hlines.iter().position(|&(a, b)| a <= y1 && y1 <= b).unwrap_or(hlines.len());
+            let v_start = vlines.iter().position(|&(a, b)| x1 >= a).unwrap_or(vlines.len());
+            let v_end = vlines.iter().position(|&(a, b)| x2 <= b).unwrap_or(vlines.len());
+            for v in v_start .. v_end {
+                line_grid[v * hlines.len() + h_idx] = true;
+            }
+        }
+    }
+
+    //println!("hlines: {:?}", hlines);
+    //println!("vlines: {:?}", vlines);
+
+    Lines { hlines, vlines, line_grid }
+}
+
+/// Group lines that are consecutive within a distance of 10.0.
+fn dedup(lines: impl Iterator<Item=NotNan<f32>>) -> Vec<(f32, f32)> {
+    let threshold = 10.0;
+    let mut out = vec![];
+    let mut lines = lines.map(|f| *f).peekable();
+    while let Some(start) = lines.next() {
+        let mut last = start;
+        while let Some(&p) = lines.peek() {
+            if last + threshold > p {
+                last = p;
+                lines.next();
+            } else {
+                break;
+            }
+        }
+        out.push((start, last));
+    }
+    out
+}
+
+#[derive(Debug)]
+pub struct Lines {
+    pub hlines: Vec<(f32, f32)>,
+    pub vlines: Vec<(f32, f32)>,
+    pub line_grid: Vec<bool>,
+}
+
+/// Deals with things like superscript and subscript, which fall outside the usual bounds 
+/// but need to be assigned to the correct line.
+/// 
+/// example, two lines:
+/// hello world
+/// m³2 test a number℡
+pub fn overlapping_lines(boxes: &mut [(RectF, usize)]) -> Node {
+    sort_y(boxes);
+    let avg_height = avg(boxes.iter().map(|(r, _)| r.height())).unwrap();
+    
+    let mut y_center = boxes[0].0.center().y();
+    let mut lines = vec![];
+    let mut y_splits = vec![];
+
+    let mut start = 0;
+    'a: loop {
+        for (i, &(r, _)) in boxes[start..].iter().enumerate() {
+            // Superscript
+            if r.center().y() > 0.5 * avg_height + y_center {
+                let end = start + i;
+                sort_x(&mut boxes[start..end]);
+                let bbox = boxes[start..end].iter().map(|&(r, _)| r).reduce(|a, b| a.union_rect(b)).unwrap();
+
+                y_splits.push(bbox.max_y());
+                lines.push(Node::singleton(&boxes[start..end]));
+                y_center = r.center().y();
+
+                start = end;
+                continue 'a;
+            }
+        }
+
+        sort_x(&mut boxes[start..]);
+        lines.push(Node::singleton(&boxes[start..]));
+
+        break;
+    }
+    match lines.len() {
+        0 => Node::singleton(&[]),
+        1 => lines.pop().unwrap(),
+        _ => Node::Grid {
+            x: vec![],
+            y: y_splits,
+            cells: lines,
+            tag: NodeTag::Paragraph
+        }
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use ordered_float::NotNan;
+
+    #[test]
+    fn test_dedup() {
+        // Input data: A series of sorted `NotNan<f32>` values
+        let input = vec![
+            NotNan::new(1.0).unwrap(),
+            NotNan::new(5.0).unwrap(),
+            NotNan::new(8.0).unwrap(),
+            NotNan::new(12.0).unwrap(),
+
+            NotNan::new(25.0).unwrap(),
+            NotNan::new(28.0).unwrap(),
+        ];
+
+        // Call the dedup function
+        let result = dedup(input.into_iter());
+
+        // Expected output:
+        // (1.0, 12.0): All values between 1.0 and 12.0 are within a threshold of 10.0.
+        // (25.0, 28.0): 25.0 and 28.0 are within a threshold of 10.0.
+        let expected = vec![(1.0, 12.0), (25.0, 28.0)];
+
+        // Assert that the result matches the expected output
+        assert_eq!(result, expected);
+    }
+}
\ No newline at end of file
diff --git a/src/node/render.rs b/src/node/render.rs
new file mode 100644
index 0000000..618e581
--- /dev/null
+++ b/src/node/render.rs
@@ -0,0 +1,71 @@
+use font::Encoder;
+use itertools::Itertools;
+use pathfinder_geometry::{rect::RectF, vector::Vector2F};
+use pdf_render::TextSpan;
+
+use crate::classify::classify;
+
+use super::Node;
+
+pub fn render<E: Encoder>(w: &mut String, spans: &[TextSpan<E>], node: &Node, bbox: RectF) {
+    _render(w, spans, node, bbox, 0)
+}
+
+fn _render<E: Encoder>(w: &mut String, spans: &[TextSpan<E>], node: &Node, bbox: RectF, level: usize) {
+    use std::fmt::Write;
+
+    match *node {
+        Node::Final { ref indices } => {
+            /*
+            for i in start..end {
+                if let Span::Text(ref t) = spans[i] {
+                    write!(w, r#"<text"#).unwrap();
+                    write!(w, r#" font-size="{}""#, t.font_size).unwrap();
+                    write!(w, r#" transform="{}""#, Transform::from(t.transform)).unwrap();
+                    write_text_span(w, t);
+                    write!(w, "</text>").unwrap();
+                }
+            }
+            */
+            
+            if indices.len() > 0 {
+                let class = classify(indices.iter().cloned().filter_map(|i| spans.get(i)));
+
+                for &i in indices.iter() {
+                    let r = spans[i].rect;
+                    write!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" class="{:?}" />"#,
+                        r.min_x(), r.max_x(), r.max_y(), r.max_y(),
+                        class
+                    );
+                }
+            }
+        }
+        Node::Grid { ref x, ref y, ref cells, tag } => {
+            use std::iter::once;
+            let columns = x.len() + 1;
+            write!(w, r#"<rect x="{}" y="{}" width="{}" height="{}" class="{:?}" />"#,
+                bbox.min_x(), bbox.min_y(), bbox.width(), bbox.height(), tag
+            );
+
+            for (j, ((min_y, max_y), row)) in once(bbox.min_y()).chain(y.iter().cloned()).chain(once(bbox.max_y())).tuple_windows().zip(cells.chunks_exact(columns)).enumerate() {
+                if j > 0 {
+                    writeln!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" level="{level}"></line>"#,
+                        bbox.min_x(), bbox.max_x(), min_y, min_y);
+                }
+
+                for (i, ((min_x, max_x), cell)) in once(bbox.min_x()).chain(x.iter().cloned()).chain(once(bbox.max_x())).tuple_windows().zip(row).enumerate() {
+                    if i > 0 {
+                        writeln!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" level="{level}"></line>"#,
+                            min_x, min_x, bbox.min_y(), bbox.max_y());
+                    }
+
+                    let bbox = RectF::from_points(Vector2F::new(min_x, min_y), Vector2F::new(max_x, max_y));
+                    _render(w, spans, cell, bbox, level+1);
+                }
+            }
+        }
+        Node::Table { .. } => {
+            
+        }
+    }
+}
diff --git a/src/node/table.rs b/src/node/table.rs
new file mode 100644
index 0000000..4d433f3
--- /dev/null
+++ b/src/node/table.rs
@@ -0,0 +1,253 @@
+use font::Encoder;
+use pathfinder_geometry::rect::RectF;
+use pdf_render::TextSpan;
+use itertools::Itertools;
+use ordered_float::NotNan;
+use crate::{node::{sort_x, sort_y, NodeTag}, util::avg};
+use super::{gap::{dist_y, gaps}, line::Lines, split_by, Node};
+
+pub use table::Table;
+
+pub fn split<E: Encoder>(boxes: &mut [(RectF, usize)], spans: &[TextSpan<E>], lines_info: &Lines) -> Node {
+    sort_y(boxes);
+    let mut lines = vec![];
+    let mut y = Span::vert(&boxes[0].0).unwrap();
+    let mut items = vec![boxes[0]];
+    
+    let mut line = vec![boxes[0]];
+    for &(rect, i) in &boxes[1..] {
+        let y2 = Span::vert(&rect).unwrap();
+        if let Some(overlap) = y.intersect(y2) {
+            y = overlap;
+        } else {
+            sort_x(&mut line);
+            lines.push(build_line(&line, spans));
+            line.clear();
+            y = y2
+        }
+        line.push((rect, i));
+    }
+    sort_x(&mut line);
+    lines.push(build_line(&line, spans));
+
+
+    let mut vparts = vec![];
+    let mut start = 0;
+    while let Some(p) = lines[start..].iter().position(|(tag, _, line)| matches!(tag, LineTag::Unknown | LineTag::Table)) {
+        let table_start = start + p;
+        let table_end = lines[table_start+1..].iter().position(|(tag, _, _)| matches!(tag, LineTag::Text)).map(|e| table_start+1+e).unwrap_or(lines.len());
+        
+        for &(_, y, ref line) in &lines[start..table_start] {
+            vparts.push((y, Node::Final { indices: line.iter().flat_map(|(_, indices)| indices.iter().cloned()).collect() }));
+        }
+
+        let lines = &lines[table_start..table_end];
+        start = table_end;
+
+        let mut columns: Vec<Span> = vec![];
+        for (_, _, line) in lines.iter() {
+            for &(x, ref parts) in line.iter() {
+                // find any column that is contained in this
+                let mut found = 0;
+                for span in columns.iter_mut() {
+                    if let Some(overlap) = span.intersect(x) {
+                        *span = overlap;
+                        found += 1;
+                    }
+                }
+                if found == 0 {
+                    columns.push(x);
+                }
+            }
+        }
+        let avg_vgap = avg(lines.iter().map(|(_, y, _)| y).tuple_windows().map(|(a, b)| *(b.start - a.end)));
+
+        columns.sort_by_key(|s| s.start);
+
+        let mut buf = String::new();
+
+        let d_threshold = avg_vgap.unwrap_or(0.0);
+        let mut prev_end = None;
+
+        let mut table: Table<Vec<usize>> = Table::empty(lines.len() as u32, columns.len() as u32);
+
+        let mut row = 0;
+        for (_, span, line) in lines {
+            let mut col = 0;
+            
+            let combine = prev_end.map(|y: NotNan<f32>| {
+                if *(span.start - y) < d_threshold {
+                    !lines_info.hlines.iter().map(|(a, b)| 0.5 * (a+b)).any(|l| *y < l && *span.start > l)
+                } else {
+                    false
+                }
+            }).unwrap_or(false);
+
+            if !combine {
+                row += 1;
+            }
+
+            for &(x, ref parts) in line {
+                let mut cols = columns.iter().enumerate()
+                    .filter(|&(_, &x2)| x.intersect(x2).is_some())
+                    .map(|(i, _)| i);
+
+                let first_col = cols.next().unwrap();
+                let last_col = cols.last().unwrap_or(first_col);
+
+                if let Some(cell) = combine.then(|| table.get_cell_value_mut(row, first_col as u32)).flatten() {
+                    // append to previous line
+                    cell.extend_from_slice(parts);
+                } else {
+                    let colspan = (last_col - first_col) as u32 + 1;
+                    let rowspan = 1;
+                    table.set_cell(parts.clone(), row, first_col as u32, rowspan, colspan);
+                }
+                col = last_col + 1;
+            }
+            prev_end = Some(span.end);
+        }
+        let y = Span { start: lines[0].1.start, end: lines.last().unwrap().1.end };
+        vparts.push((y, Node::Table { table }));
+    }
+    for &(_, y, ref line) in &lines[start..] {
+        vparts.push((y, Node::Final { indices: line.iter().flat_map(|(_, indices)| indices.iter().cloned()).collect() }));
+    }
+
+    if vparts.len() > 1 {
+        let y = vparts.iter().tuple_windows().map(|(a, b)| 0.5 * (a.0.end + b.0.start).into_inner()).collect();
+        Node::Grid {
+            tag: NodeTag::Complex,
+            x: vec![],
+            y,
+            cells: vparts.into_iter().map(|(_, n)| n).collect()
+        }
+    } else {
+        vparts.pop().unwrap().1
+    }
+}
+
+#[derive(Debug)]
+enum LineTag {
+    Unknown,
+    Text,
+    Table,
+}
+
+fn build_line<E: Encoder>(boxes: &[(RectF, usize)], spans: &[TextSpan<E>]) -> (LineTag, Span, Vec<(Span, Vec<usize>)>) {
+    use std::mem::replace;
+    let mut line = vec![];
+    let mut x = Span::horiz(&boxes[0].0).unwrap();
+    let mut y = Span::vert(&boxes[0].0).unwrap();
+    let mut items = vec![boxes[0].1];
+
+    for &(rect, i) in &boxes[1..] {
+        y = y.union(Span::vert(&rect).unwrap()).unwrap();
+        let x2 = Span::horiz(&rect).unwrap();
+        if let Some(u) = x.union(x2) {
+            x = u;
+            items.push(i);
+        } else {
+            line.push((x, replace(&mut items, vec![i])));
+            x = x2;
+        }
+    }
+    line.push((x, items));
+
+    let avg_font_size = avg(boxes.iter().filter_map(|&(_, i)| spans.get(i)).map(|s| s.font_size)).unwrap();
+
+    let max_gap = line.iter().tuple_windows().map(|(l, r)| r.0.start - l.0.end).max();
+    let tag = match max_gap {
+        None => LineTag::Unknown,
+        Some(x) if x.into_inner() < 0.3 * avg_font_size => LineTag::Text,
+        Some(_) => LineTag::Table,
+    };
+
+    (tag, y, line)
+}
+
+#[derive(Copy, Clone, Debug)]
+struct Span {
+    start: NotNan<f32>,
+    end: NotNan<f32>,
+}
+impl Span {
+    fn horiz(rect: &RectF) -> Option<Self> {
+        Self::new(rect.min_x(), rect.max_x())
+    }
+    fn vert(rect: &RectF) -> Option<Self> {
+        Self::new(rect.min_y(), rect.max_y())
+    }
+    fn new(mut start: f32, mut end: f32) -> Option<Self> {
+        if start > end {
+            std::mem::swap(&mut start, &mut end);
+        }
+        Some(Span {
+            start: NotNan::new(start).ok()?,
+            end: NotNan::new(end).ok()?,
+        })
+    }
+    // Whether two vertical or horizontal lines overlap, return the intersection.
+    fn intersect(self, other: Span) -> Option<Span> {
+        if self.start <= other.end && other.start <= self.end {
+            Some(Span {
+                start: self.start.max(other.start),
+                end: self.end.min(other.end),
+            })
+        } else {
+            None
+        }
+    }
+
+    fn union(self, other: Span) -> Option<Span> {
+        if self.start <= other.end && other.start <= self.end {
+            Some(Span {
+                start: self.start.min(other.start),
+                end: self.end.max(other.end)
+            })
+        } else {
+            None
+        }
+    }
+}
+
+#[allow(dead_code)]
+fn split_v(boxes: &mut [(RectF, usize)]) -> Node {
+    let num_boxes = boxes.len();
+    if num_boxes < 2 {
+        return Node::singleton(boxes)
+    }
+
+    let max_y_gap = dist_y(boxes);
+
+    let max_gap = match max_y_gap {
+        Some((y, _)) => y,
+        None => {
+            sort_x(boxes);
+            return Node::singleton(boxes);
+        }
+    };
+    let threshold = max_gap * 0.8;
+    let mut cells = vec![];
+
+    let y_gaps: Vec<f32> = gaps(threshold, boxes, |r| (r.min_y(), r.max_y()))
+        .collect();
+    
+    for row in split_by(boxes, &y_gaps, |r| r.min_y()) {
+        assert!(row.len() < num_boxes);
+        cells.push(split_v(row));
+    }
+
+    let tag = if cells.iter().all(|n| n.tag() <= NodeTag::Line) {
+        NodeTag::Paragraph
+    } else {
+        NodeTag::Complex
+    };
+
+    Node::Grid {
+        x: vec![],
+        y: y_gaps,
+        cells,
+        tag,
+    }
+}
\ No newline at end of file
diff --git a/src/text.rs b/src/text.rs
index 2f6e6cd..e31fe76 100644
--- a/src/text.rs
+++ b/src/text.rs
@@ -1,15 +1,202 @@
+use std::mem::take;
+
+use font::Encoder;
 use pathfinder_geometry::vector::Vector2F;
 use pdf_render::TextSpan;
-use itertools::{Itertools};
+use itertools::Itertools;
 use unicode_normalization::UnicodeNormalization;
-use crate::{util::avg, entry::Word, util::Rect};
+use crate::{flow::{Char, Rect, Word}, util::avg};
+
+pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan<E>> + Clone) -> Vec<Word> {
+    let word_gap = analyze_word_gap(items.clone());
+    let mut words = Vec::new();
+    let mut current_word = WordBuilder::new(out.len(), 0.0);
+    
+    // Whether the last processed TextChar is a whitespace
+    // ' '        Space
+    // '\t'       Tab
+    // '\n'       Line feed
+    // '\r'       Carriage return
+    // '\u{00A0}' Non-breaking space
+    let mut trailing_space = out.chars().last().map_or(true, |c| c.is_whitespace());
+
+    let mut end = 0.; // trailing edge of the last char
+
+    for span in items {
+        let mut offset = 0;
+        let tr_inv = span.transform.matrix.inverse();
+        // Device space to em space
+        let x_off = (tr_inv * span.transform.vector).x();
+
+        let mut chars = span.chars.iter().peekable();
 
-pub fn concat_text<'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan> + Clone) -> Vec<Word> {
-    let mut words = vec![];
+        while let Some(current) = chars.next() {
+            // Get text for current char
+            let text = if let Some(next) = chars.peek() {
+                let s = &span.text[offset..next.offset];
+                offset = next.offset;
+                s
+            } else {
+                &span.text[offset..]
+            };
+
+            // Calculate char positions in device space
+            let char_start = (span.transform * Vector2F::new(current.pos, 0.0)).x();
+            let char_end = (span.transform * Vector2F::new(current.pos + current.width, 0.0)).x();
+            
+            let is_whitespace = text.chars().all(|c| c.is_whitespace());
+           
+            // byte offsets
+            let bytes_offset = text.len();
+            // Handle word boundaries
+            if trailing_space && !is_whitespace {
+                // Start new word after space
+                current_word = WordBuilder::new(out.len(),char_start);
+                current_word.add_char(bytes_offset, char_start, char_end);
+
+                out.push_str(text);
+            } else if !trailing_space {
+                if is_whitespace {
+                    // End word at space
+                    words.push(current_word.build(out));
+
+                    out.push(' ');
+                    current_word = WordBuilder::new(out.len(),char_start);
+                } else if current.pos + x_off > end + word_gap {
+                  
+                    // End word at large gap
+                    words.push(current_word.build(out));
+
+                    current_word = WordBuilder::new(out.len(), char_start);
+                    current_word.add_char(bytes_offset, char_start, char_end);
+
+                    out.push_str(text);
+                } else {
+                    // Continue current word
+                    current_word.add_char(bytes_offset, char_start, char_end);
+
+                    // out.extend(text.nfkc()); // nfkc will change the bytes length of a char.
+                    out.push_str(text);
+                }
+            }
+            trailing_space = is_whitespace;
+
+            end = current.pos + x_off + current.width;
+
+            current_word.update_bounds(span.rect.min_y(), span.rect.max_y());
+        }
+    }
+
+    // Add final word if any
+    if !current_word.is_empty() {
+        words.push(current_word.build(out));
+    }
+
+    words
+}
+
+// Helper struct to build up words
+struct WordBuilder {
+    word_start_idx: usize,
+
+    // For calculating the layout(position, width , height) of a word
+    start_pos: f32,
+    end_pos: f32, 
+    y_min: f32,
+    y_max: f32,
+
+    chars: Vec<Char>,
+    bytes_offset: usize,
+
+    // New word
+    new: bool,
+}
+
+impl WordBuilder {
+    fn new(word_start_idx: usize, start_pos: f32) -> Self {
+        Self {
+            word_start_idx,
+            start_pos,
+            end_pos: 0.0,
+            y_min: f32::INFINITY,
+            y_max: -f32::INFINITY,
+            chars: Vec::new(),
+            bytes_offset: 0,
+            new: true,
+        }
+    }
+
+    fn add_char(&mut self, bytes_offset: usize, start: f32, end: f32) {
+        self.chars.push(Char {
+            offset: self.bytes_offset,
+            pos: start,
+            width: end - start,
+        });
+        self.end_pos = end;
+
+        self.bytes_offset += bytes_offset;
+    }
+
+    fn update_bounds(&mut self, min_y: f32, max_y: f32) {
+        if self.new {
+            self.y_min = min_y;
+            self.y_max = max_y;
+
+            self.new = false;
+        } else {
+            self.y_min = self.y_min.min(min_y);
+            self.y_max = self.y_max.max(max_y);
+        }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.chars.is_empty()
+    }
+
+    fn build(mut self, out: &str) -> Word {
+        Word {
+            text: out[self.word_start_idx..].into(),
+            rect: Rect {
+                x: self.start_pos,
+                y: self.y_min,
+                h: self.y_max - self.y_min,
+                w: self.end_pos - self.start_pos
+            },
+            chars: take(&mut self.chars)
+        }
+    }
+}
+
+/// Calculate gaps between each char, the return value unit is em
+
+/// The most important thing here is to make sure the gap is bigger than char gap, and less than word gap.
+/// for example: 
+/// think of something like "ab____________c de"
+/// 
+/// a-b has a zero space (or 0.01)
+/// b-c has a huge space of 10
+/// c-d has 0.2
+/// d-e has 0.01
+/// if we just take the average = 10.2 and divide that by 4 we get 2.5
+/// and now c-d is smaller than that and not classified as a space
+/// but if b-c is capped by the threshold of 0.5, the sum is 0.7, and the avg is 0.7/4 ~ 0.18
+/// and everything is fine.
+
+/// 0 + min(0.5, 10) + 0.2 + 0
+/// 10 capped at 0.5 is0.5
+/// min(0, 0.5) + min(10, 0.5) + min(0.2, 0.5) + min(0, 0.5)
+/// 0 + 0.5 + 0.2 + 0
+/// every value is limited to be at least 0.01 and not more than 0.5.
+/// the 0.5 is 0.25 * font size of the left char and 0.25 * font size of the right char
+/// if they are the same font size it is 0.5
+fn analyze_word_gap<'a, E: Encoder + 'a>(items: impl Iterator<Item=&'a TextSpan<E>> + Clone) -> f32 {
     let gaps = items.clone()
         .flat_map(|s| {
+            // the transform matrix is from em space to device space
+            // so we need to invert it, becoming device space to em space
             let tr_inv = s.transform.matrix.inverse();
             let pos = (tr_inv * s.transform.vector).x();
+
             s.chars.iter()
                 .filter(|c| !s.text[c.offset..].chars().next().unwrap().is_whitespace())
                 .map(move |c| (c.pos + pos, c.pos + pos + c.width, s.font_size))
@@ -17,78 +204,82 @@ pub fn concat_text<'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan>
         .tuple_windows()
         .filter(|(a, b)| b.0 > a.0)
         .map(|(a, b)| (b.0 - a.1).max(0.01).min(0.25 * (a.2 + b.2)));
-    
-    let font_size = avg(items.clone().map(|s| s.font_size)).unwrap();
+
+    let avg_font_size = avg(items.clone().map(|s| s.font_size)).unwrap();
     //gaps.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
-    let space_gap = (0.5 * font_size).min(2.0 * avg(gaps).unwrap_or(0.0)); //2.0 * gaps[gaps.len()/2];
-    let mut end = 0.; // trailing edge of the last char
-    let mut trailing_space = out.chars().last().map(|c| c.is_whitespace()).unwrap_or(true);
-    let mut word_start_pos = 0.0;
-    let mut word_start_idx = out.len();
-    let mut y_min = f32::INFINITY;
-    let mut y_max = -f32::INFINITY;
-    let mut word_start = true;
-    let mut word_end = 0.0;
 
-    for span in items {
-        let mut pos = 0; // byte index of last char into span.text
-        let tr_inv = span.transform.matrix.inverse();
-        let x_off = (tr_inv * span.transform.vector).x();
-        for c in span.chars.iter() {
+    (0.5 * avg_font_size).min(2.0 * avg(gaps).unwrap_or(0.0)) //2.0 * gaps[gaps.len()/2];
+}
 
-            let s = &span.text[pos..c.offset];
-            if c.offset > 0 {
-                let is_whitespace = s.chars().all(|c| c.is_whitespace());
-                if !trailing_space || !is_whitespace {
-                    out.extend(s.nfkc());
-                }
-                trailing_space = is_whitespace;
-            }
-            if !trailing_space && c.pos + x_off > end + space_gap {
-                words.push(Word {
-                    text: out[word_start_idx..].into(),
-                    rect: Rect {
-                        x: word_start_pos,
-                        y: y_min,
-                        h: y_max - y_min,
-                        w: word_end - word_start_pos
-                    }
-                });
-                
-                out.push(' ');
-                trailing_space = true;
-                word_start = true;
-                word_start_idx = out.len();
-            }
-            pos = c.offset;
-            end = c.pos + x_off + c.width;
-            if c.offset == 0 || !trailing_space {
-                word_end = (span.transform.matrix * Vector2F::new(end, 0.0)).x();
-            }
+#[cfg(test)]
+mod tests {
+    use pathfinder_geometry::{rect::RectF, transform2d::Transform2F};
+    use pdf_render::{font::OutlineBuilder, Fill, TextChar};
 
-            if word_start {
-                y_min = span.rect.min_y();
-                y_max = span.rect.max_y();
-                word_start_pos = (span.transform.matrix * Vector2F::new(c.pos + x_off, 0.0)).x();
-                word_start = false;
-            } else {
-                y_min = y_min.min(span.rect.min_y());
-                y_max = y_max.max(span.rect.max_y());
+    use super::*;
+
+    #[test]
+    fn test_concat_text() {
+        let text_span: TextSpan<OutlineBuilder> = TextSpan {
+            rect: RectF::from_points(Vector2F::new(56.8, 55.85077), Vector2F::new(136.26399, 67.85077)),
+            width: 79.464,
+            bbox: None,
+            font_size: 12.0,
+            font: None,
+            text: "hello world".to_string(),
+            chars: vec![
+                TextChar { offset: 0, pos: 0.0, width: 7.224001 },
+                TextChar { offset: 1, pos: 7.224001, width: 7.224001 },
+                TextChar { offset: 2, pos: 14.448002, width: 7.224001 },
+                TextChar { offset: 3, pos: 21.672003, width: 7.224001 },
+                TextChar { offset: 4, pos: 28.896004, width: 7.224001 },
+                TextChar { offset: 5, pos: 36.120003, width: 7.224001 },
+                TextChar { offset: 6, pos: 43.344, width: 7.224001 },
+                TextChar { offset: 7, pos: 50.568, width: 7.224001 },
+                TextChar { offset: 8, pos: 57.792, width: 7.224001 },
+                TextChar { offset: 9, pos: 65.016, width: 7.224001 },
+                TextChar { offset: 10, pos: 72.24, width: 7.224001 },
+            ],
+            color: Fill::Solid(0.0, 0.5019608, 0.0),
+            alpha: 1.0,
+            transform: Transform2F::row_major(1.0, 0.0, 56.8, 0.0, 1.0, 67.85077),
+            mode: pdf::content::TextMode::Fill,
+            op_nr: 18,
+        };
+
+        let mut output = String::new();
+        let words = concat_text(&mut output, vec![&text_span].into_iter());
+
+        // Assert the concatenated text
+        assert_eq!(output, "hello world");
+
+        // Assert the words
+        // Expect two words: "hello" and "world"
+        assert_eq!(words.len(), 2); 
+        assert_eq!(words[0].text, "hello");
+        assert_eq!(words[1].text, "world");
+
+        // Assert chars positions
+        for w in words {
+            let text = &w.text;
+            let mut offset = 0;
+            
+            let mut texts = vec![];
+
+            let mut chars = w.chars.iter().peekable();
+
+            while let Some(_) = chars.next() {
+                // Get text for current char
+                let s = if let Some(next) = chars.peek() {
+                    let s = &text[offset..next.offset];
+                    offset = next.offset;
+                    s
+                } else {
+                    &text[offset..]
+                };
+
+                texts.push(s);
             }
         }
-        trailing_space = span.text[pos..].chars().all(|c| c.is_whitespace());
-
-        out.extend(span.text[pos..].nfkc());
     }
-    words.push(Word {
-        text: out[word_start_idx..].into(),
-        rect: Rect {
-            x: word_start_pos,
-            y: y_min,
-            h: y_max - y_min,
-            w: word_end - word_start_pos
-        }
-    });
-    
-    words
 }
\ No newline at end of file
diff --git a/src/tree.rs b/src/tree.rs
deleted file mode 100644
index 2eb696c..0000000
--- a/src/tree.rs
+++ /dev/null
@@ -1,958 +0,0 @@
-use pdf_render::TextSpan;
-use pathfinder_geometry::{
-    vector::Vector2F,
-    rect::RectF
-};
-#[cfg(feature="ocr")]
-use tesseract_plumbing::Text;
-
-use std::collections::BTreeSet;
-use std::iter::once;
-use std::sync::Arc;
-use itertools::{Itertools};
-use ordered_float::NotNan;
-use crate::entry::{Flow, Line, Run, RunType, Word};
-use crate::util::{is_number, avg, CellContent};
-use crate::text::{concat_text};
-use std::mem::take;
-use table::Table;
-
-pub fn build(spans: &[TextSpan], bbox: RectF, lines: &[[f32; 4]]) -> Node {
-    if spans.len() == 0 {
-        return Node::singleton(&[]);
-    }
-
-    let mut boxes: Vec<(RectF, usize)> = spans.iter().enumerate().map(|(i, t)| (t.rect, i)).collect();
-    let mut boxes = boxes.as_mut_slice();
-    
-    let avg_font_size = avg(spans.iter().map(|s| s.font_size)).unwrap();
-    let probaby_header = |boxes: &[(RectF, usize)]| {
-        let class = classify(boxes.iter().filter_map(|&(_, i)| spans.get(i)));
-        if matches!(class, Class::Header | Class::Number) {
-            return true;
-        }
-        let f = avg(boxes.iter().filter_map(|&(_, i)| spans.get(i)).map(|s| s.font_size)).unwrap();
-        f > avg_font_size
-    };
-    let probably_footer = |boxes: &mut [(RectF, usize)]| {
-        sort_x(boxes);
-        let x_gaps: Vec<f32> = gaps(avg_font_size, boxes, |r| (r.min_x(), r.max_x()))
-            .collect();
-        
-        let count = split_by(boxes, &x_gaps, |r| r.min_x()).filter(|cell| probaby_header(cell)).count();
-        count == x_gaps.len() + 1
-    };
-
-    sort_y(boxes);
-    let (top, bottom) = top_bottom_gap(boxes, bbox);
-    if let Some(bottom) = bottom {
-        if probably_footer(&mut boxes[bottom..]) {
-            boxes = &mut boxes[..bottom];
-        }
-    }
-    if let Some(top) = top {
-        if probaby_header(&mut boxes[..top]) {
-            boxes = &mut boxes[top..];
-        }
-    }
-    sort_x(boxes);
-    let (left, right) = left_right_gap(boxes, bbox);
-    if let Some(right) = right {
-        if probaby_header(&boxes[right..]) {
-            boxes = &mut boxes[..right];
-        }
-    }
-    if let Some(left) = left {
-        if probaby_header(&boxes[..left]) {
-            boxes = &mut boxes[left..];
-        }
-    }
-    let lines = analyze_lines(lines);
-    split(boxes, &spans, &lines)
-}
-
-fn analyze_lines(lines: &[[f32; 4]]) -> Lines {
-    let mut hlines = BTreeSet::new();
-    let mut vlines = BTreeSet::new();
-
-    for &[x1, y1, x2, y2] in lines {
-        if x1 == x2 {
-            vlines.insert(NotNan::new(x1).unwrap());
-        } else if y1 == y2 {
-            hlines.insert(NotNan::new(y1).unwrap());
-        }
-    }
-
-    fn dedup(lines: impl Iterator<Item=NotNan<f32>>) -> Vec<(f32, f32)> {
-        let threshold = 10.0;
-        let mut out = vec![];
-        let mut lines = lines.map(|f| *f).peekable();
-        while let Some(start) = lines.next() {
-            let mut last = start;
-            while let Some(&p) = lines.peek() {
-                if last + threshold > p {
-                    last = p;
-                    lines.next();
-                } else {
-                    break;
-                }
-            }
-            out.push((start, last));
-        }
-        out
-    }
-
-    let hlines = dedup(hlines.iter().cloned());
-    let vlines = dedup(vlines.iter().cloned());
-
-    let mut line_grid = vec![false; vlines.len() * hlines.len()];
-    for &[x1, y1, x2, y2] in lines {
-        if x1 == x2 {
-            let v_idx = vlines.iter().position(|&(a, b)| a <= x1 && x1 <= b).unwrap_or(vlines.len());
-            let h_start = hlines.iter().position(|&(a, b)| y1 >= a).unwrap_or(hlines.len());
-            let h_end = hlines.iter().position(|&(a, b)| y2 <= b).unwrap_or(hlines.len());
-            for h in h_start .. h_end {
-                line_grid[v_idx * hlines.len() + h] = true;
-            }
-        } else if y1 == y2 {
-            let h_idx = hlines.iter().position(|&(a, b)| a <= y1 && y1 <= b).unwrap_or(hlines.len());
-            let v_start = vlines.iter().position(|&(a, b)| x1 >= a).unwrap_or(vlines.len());
-            let v_end = vlines.iter().position(|&(a, b)| x2 <= b).unwrap_or(vlines.len());
-            for v in v_start .. v_end {
-                line_grid[v * hlines.len() + h_idx] = true;
-            }
-        }
-    }
-
-
-    //println!("hlines: {:?}", hlines);
-    //println!("vlines: {:?}", vlines);
-
-    Lines { hlines, vlines, line_grid }
-}
-
-pub struct Lines {
-    hlines: Vec<(f32, f32)>,
-    vlines: Vec<(f32, f32)>,
-    line_grid: Vec<bool>,
-}
-
-#[derive(Copy, Clone, Debug)]
-struct Span {
-    start: NotNan<f32>,
-    end: NotNan<f32>,
-}
-impl Span {
-    fn horiz(rect: &RectF) -> Option<Self> {
-        Self::new(rect.min_x(), rect.max_x())
-    }
-    fn vert(rect: &RectF) -> Option<Self> {
-        Self::new(rect.min_y(), rect.max_y())
-    }
-    fn new(mut start: f32, mut end: f32) -> Option<Self> {
-        if start > end {
-            std::mem::swap(&mut start, &mut end);
-        }
-        Some(Span {
-            start: NotNan::new(start).ok()?,
-            end: NotNan::new(end).ok()?,
-        })
-    }
-    fn intersect(self, other: Span) -> Option<Span> {
-        if self.start <= other.end && other.start <= self.end {
-            Some(Span {
-                start: self.start.max(other.start),
-                end: self.end.min(other.end),
-            })
-        } else {
-            None
-        }
-    }
-    fn union(self, other: Span) -> Option<Span> {
-        if self.start <= other.end && other.start <= self.end {
-            Some(Span {
-                start: self.start.min(other.start),
-                end: self.end.max(other.end)
-            })
-        } else {
-            None
-        }
-    }
-}
-
-pub fn split2(boxes: &mut [(RectF, usize)], spans: &[TextSpan], lines_info: &Lines) -> Node {
-    use std::mem::replace;
-
-    #[derive(Debug)]
-    enum LineTag {
-        Unknown,
-        Text,
-        Table,
-    }
-
-    sort_y(boxes);
-    let mut lines = vec![];
-    let mut y = Span::vert(&boxes[0].0).unwrap();
-    let mut items = vec![boxes[0]];
-
-    let build_line = |boxes: &[(RectF, usize)]| -> (LineTag, Span, Vec<(Span, Vec<usize>)>) {
-        let mut line = vec![];
-        let mut x = Span::horiz(&boxes[0].0).unwrap();
-        let mut y = Span::vert(&boxes[0].0).unwrap();
-        let mut items = vec![boxes[0].1];
-
-        for &(rect, i) in &boxes[1..] {
-            y = y.union(Span::vert(&rect).unwrap()).unwrap();
-            let x2 = Span::horiz(&rect).unwrap();
-            if let Some(u) = x.union(x2) {
-                x = u;
-                items.push(i);
-            } else {
-                line.push((x, replace(&mut items, vec![i])));
-                x = x2;
-            }
-        }
-        line.push((x, items));
-
-        let f = avg(boxes.iter().filter_map(|&(_, i)| spans.get(i)).map(|s| s.font_size)).unwrap();
-
-        let max_gap = line.iter().tuple_windows().map(|(l, r)| r.0.start - l.0.end).max();
-        let tag = match max_gap {
-            None => LineTag::Unknown,
-            Some(x) if x.into_inner() < 0.3 * f => LineTag::Text,
-            Some(_) => LineTag::Table,
-        };
-
-        (tag, y, line)
-    };
-
-    let mut line = vec![boxes[0]];
-    for &(rect, i) in &boxes[1..] {
-        let y2 = Span::vert(&rect).unwrap();
-        if let Some(overlap) = y.intersect(y2) {
-            y = overlap;
-        } else {
-            sort_x(&mut line);
-            lines.push(build_line(&line));
-            line.clear();
-            y = y2
-        }
-        line.push((rect, i));
-    }
-    sort_x(&mut line);
-    lines.push(build_line(&line));
-
-
-    let mut vparts = vec![];
-    let mut start = 0;
-    while let Some(p) = lines[start..].iter().position(|(tag, _, line)| matches!(tag, LineTag::Unknown | LineTag::Table)) {
-        let table_start = start + p;
-        let table_end = lines[table_start+1..].iter().position(|(tag, _, _)| matches!(tag, LineTag::Text)).map(|e| table_start+1+e).unwrap_or(lines.len());
-        
-        for &(_, y, ref line) in &lines[start..table_start] {
-            vparts.push((y, Node::Final { indices: line.iter().flat_map(|(_, indices)| indices.iter().cloned()).collect() }));
-        }
-
-        let lines = &lines[table_start..table_end];
-        start = table_end;
-
-        let mut columns: Vec<Span> = vec![];
-        for (_, _, line) in lines.iter() {
-            for &(x, ref parts) in line.iter() {
-                // find any column that is contained in this
-                let mut found = 0;
-                for span in columns.iter_mut() {
-                    if let Some(overlap) = span.intersect(x) {
-                        *span = overlap;
-                        found += 1;
-                    }
-                }
-                if found == 0 {
-                    columns.push(x);
-                }
-            }
-        }
-        let avg_vgap = avg(lines.iter().map(|(_, y, _)| y).tuple_windows().map(|(a, b)| *(b.start - a.end)));
-
-        columns.sort_by_key(|s| s.start);
-
-        let mut buf = String::new();
-
-        let d_threshold = avg_vgap.unwrap_or(0.0);
-        let mut prev_end = None;
-
-        let mut table: Table<Vec<usize>> = Table::empty(lines.len() as u32, columns.len() as u32);
-
-        let mut row = 0;
-        for (_, span, line) in lines {
-            let mut col = 0;
-            
-            let combine = prev_end.map(|y: NotNan<f32>| {
-                if *(span.start - y) < d_threshold {
-                    !lines_info.hlines.iter().map(|(a, b)| 0.5 * (a+b)).any(|l| *y < l && *span.start > l)
-                } else {
-                    false
-                }
-            }).unwrap_or(false);
-
-            if !combine {
-                row += 1;
-            }
-
-            for &(x, ref parts) in line {
-                let mut cols = columns.iter().enumerate()
-                    .filter(|&(_, &x2)| x.intersect(x2).is_some())
-                    .map(|(i, _)| i);
-
-                let first_col = cols.next().unwrap();
-                let last_col = cols.last().unwrap_or(first_col);
-
-                if let Some(cell) = combine.then(|| table.get_cell_value_mut(row, first_col as u32)).flatten() {
-                    // append to previous line
-                    cell.extend_from_slice(parts);
-                } else {
-                    let colspan = (last_col - first_col) as u32 + 1;
-                    let rowspan = 1;
-                    table.set_cell(parts.clone(), row, first_col as u32, rowspan, colspan);
-                }
-                col = last_col + 1;
-            }
-            prev_end = Some(span.end);
-        }
-        let y = Span { start: lines[0].1.start, end: lines.last().unwrap().1.end };
-        vparts.push((y, Node::Table { table }));
-    }
-    for &(_, y, ref line) in &lines[start..] {
-        vparts.push((y, Node::Final { indices: line.iter().flat_map(|(_, indices)| indices.iter().cloned()).collect() }));
-    }
-
-    if vparts.len() > 1 {
-        let y = vparts.iter().tuple_windows().map(|(a, b)| 0.5 * (a.0.end + b.0.start).into_inner()).collect();
-        Node::Grid {
-            tag: NodeTag::Complex,
-            x: vec![],
-            y,
-            cells: vparts.into_iter().map(|(_, n)| n).collect()
-        }
-    } else {
-        vparts.pop().unwrap().1
-    }
-}
-
-#[derive(Debug)]
-pub enum Node {
-    Final { indices: Vec<usize> },
-    Grid { x: Vec<f32>, y: Vec<f32>, cells: Vec<Node>, tag: NodeTag },
-    Table { table: Table<Vec<usize>> },
-}
-impl Node {
-    fn tag(&self) -> NodeTag {
-        match *self {
-            Node::Grid { tag, .. } => tag,
-            Node::Table { .. } => NodeTag::Complex,
-            Node::Final { .. } => NodeTag::Singleton,
-        }
-    }
-    fn indices(&self, out: &mut Vec<usize>) {
-        match *self {
-            Node::Final { ref indices } => out.extend_from_slice(&indices),
-            Node::Grid { ref cells, .. } => {
-                for n in cells {
-                    n.indices(out);
-                }
-            }
-            Node::Table { ref table } => {
-                out.extend(
-                    table.values()
-                        .flat_map(|v| v.value.iter())
-                        .cloned()
-                );
-            }
-        }
-    }
-    fn singleton(nodes: &[(RectF, usize)]) -> Self {
-        Node::Final { indices: nodes.iter().map(|t| t.1).collect() }
-    }
-}
-
-#[derive(PartialOrd, Ord, Eq, PartialEq, Clone, Copy, Debug)]
-pub enum NodeTag {
-    Singleton,
-    Line,
-    Paragraph,
-    Complex,
-}
-
-pub fn items(mut flow: &mut Flow, spans: &[TextSpan], node: &Node, x_anchor: f32) {
-    match *node {
-        Node::Final { ref indices } => {
-            if indices.len() > 0 {
-                let node_spans = indices.iter().flat_map(|&i| spans.get(i));
-                let bbox = node_spans.clone().map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap();
-                let class = classify(node_spans.clone());
-                let mut text = String::new();
-                let words = concat_text(&mut text, node_spans);
-                
-                let t = match class {
-                    Class::Header => RunType::Header,
-                    _ => RunType::Paragraph,
-                };
-                flow.add_line(words, t);
-            }
-        }
-        Node::Grid { ref x, ref y, ref cells, tag } => {
-            match tag {
-                NodeTag::Singleton |
-                NodeTag::Line => {
-                    let mut indices = vec![];
-                    node.indices(&mut indices);
-                    let line_spans = indices.iter().flat_map(|&i| spans.get(i));
-                    let bbox: RectF = line_spans.clone().map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
-
-                    let mut text = String::new();
-                    let words = concat_text(&mut text, line_spans.clone());
-                    let class = classify(line_spans.clone());
-
-                    let t = match class {
-                        Class::Header => RunType::Header,
-                        _ => RunType::Paragraph,
-                    };
-                    flow.add_line(words, t);
-                }
-                NodeTag::Paragraph => {
-                    assert_eq!(x.len(), 0);
-                    let mut lines: Vec<(RectF, usize)> = vec![];
-                    let mut indices = vec![];
-                    for n in cells {
-                        let start = indices.len();
-                        n.indices(&mut indices);
-                        if indices.len() > start {
-                            let cell_spans = indices[start..].iter().flat_map(|&i| spans.get(i));
-                            let bbox = cell_spans.map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
-                            lines.push((bbox, indices.len()));
-                        }
-                    }
-
-                    let para_spans = indices.iter().flat_map(|&i| spans.get(i));
-                    let class = classify(para_spans.clone());
-                    let bbox = lines.iter().map(|t| t.0).reduce(|a, b| a.union_rect(b)).unwrap();
-                    let line_height = avg(para_spans.map(|s| s.rect.height())).unwrap();
-                    // classify the lines by this vertical line
-                    let left_margin = bbox.min_x() + 0.5 * line_height;
-
-                    // count how many are right and left of the split.
-                    let mut left = 0;
-                    let mut right = 0;
-
-                    for (line_bbox, _) in lines.iter() {
-                        if line_bbox.min_x() >= left_margin {
-                            right += 1;
-                        } else {
-                            left += 1;
-                        }
-                    }
-
-                    // typically paragraphs are indented to the right and longer than 2 lines.
-                    // then there will be a higher left count than right count.
-                    let indent = left > right;
-
-                    let mut para_start = 0;
-                    let mut line_start = 0;
-                    let mut text = String::new();
-                    let mut para_bbox = RectF::default();
-                    let mut flow_lines = vec![];
-                    for &(line_bbox, end) in lines.iter() {
-                        if line_start != 0 {
-                            // if a line is indented (or outdented), it marks a new paragraph
-                            if (line_bbox.min_x() >= left_margin) == indent {
-                                flow.runs.push(Run {
-                                    lines: take(&mut flow_lines),
-                                    kind: match class {
-                                        Class::Header => RunType::Header,
-                                        _ => RunType::Paragraph
-                                    }
-                                });
-                                para_start = line_start;
-                            } else {
-                                text.push('\n');
-                            }
-                        }
-                        if end > line_start {
-                            let words = concat_text(&mut text, indices[line_start..end].iter().flat_map(|&i| spans.get(i)));
-
-                            if words.len() > 0 {
-                                flow_lines.push(Line { words });
-                            }
-                        }
-                        if para_start == line_start {
-                            para_bbox = line_bbox;
-                        } else {
-                            para_bbox = para_bbox.union_rect(line_bbox);
-                        }
-                        line_start = end;
-                    }
-
-                    flow.runs.push(Run {
-                        lines: flow_lines,
-                        kind: match class {
-                            Class::Header => RunType::Header,
-                            _ => RunType::Paragraph
-                        }
-                    });
-                }
-                NodeTag::Complex => {
-                    let x_anchors = once(x_anchor).chain(x.iter().cloned()).cycle();
-                    for (node, x) in cells.iter().zip(x_anchors) {
-                        items(flow, spans, node, x);
-                    }
-                }
-            }
-        }
-        Node::Table { ref table } => {
-            if let Some(bbox) = table.values()
-                .flat_map(|v| v.value.iter().flat_map(|&i| spans.get(i).map(|s| s.rect)))
-                .reduce(|a, b| a.union_rect(b)) {
-                let table = table.flat_map(|indices| {
-                    if indices.len() == 0 {
-                        None
-                    } else {
-                        let line_spans = indices.iter().flat_map(|&i| spans.get(i));
-                        let bbox: RectF = line_spans.clone().map(|s| s.rect).reduce(|a, b| a.union_rect(b)).unwrap().into();
-
-                        let mut text = String::new();
-                        concat_text(&mut text, line_spans.clone());
-                        Some(CellContent {
-                            text,
-                            rect: bbox.into(),
-                        })
-                    }
-                });
-                flow.add_table(table);
-            }
-        }
-    }
-}
-
-
-pub fn render(w: &mut String, spans: &[TextSpan], node: &Node, bbox: RectF) {
-    _render(w, spans, node, bbox, 0)
-}
-fn _render(w: &mut String, spans: &[TextSpan], node: &Node, bbox: RectF, level: usize) {
-    use std::fmt::Write;
-
-    match *node {
-        Node::Final { ref indices } => {
-            /*
-            for i in start..end {
-                if let Span::Text(ref t) = spans[i] {
-                    write!(w, r#"<text"#).unwrap();
-                    write!(w, r#" font-size="{}""#, t.font_size).unwrap();
-                    write!(w, r#" transform="{}""#, Transform::from(t.transform)).unwrap();
-                    write_text_span(w, t);
-                    write!(w, "</text>").unwrap();
-                }
-            }
-            */
-            
-            if indices.len() > 0 {
-                let class = classify(indices.iter().cloned().filter_map(|i| spans.get(i)));
-
-                for &i in indices.iter() {
-                    let r = spans[i].rect;
-                    write!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" class="{:?}" />"#,
-                        r.min_x(), r.max_x(), r.max_y(), r.max_y(),
-                        class
-                    );
-                }
-            }
-        }
-        Node::Grid { ref x, ref y, ref cells, tag } => {
-            use std::iter::once;
-            let columns = x.len() + 1;
-            write!(w, r#"<rect x="{}" y="{}" width="{}" height="{}" class="{:?}" />"#,
-                bbox.min_x(), bbox.min_y(), bbox.width(), bbox.height(), tag
-            );
-
-            for (j, ((min_y, max_y), row)) in once(bbox.min_y()).chain(y.iter().cloned()).chain(once(bbox.max_y())).tuple_windows().zip(cells.chunks_exact(columns)).enumerate() {
-                if j > 0 {
-                    writeln!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" level="{level}"></line>"#,
-                        bbox.min_x(), bbox.max_x(), min_y, min_y);
-                }
-
-                for (i, ((min_x, max_x), cell)) in once(bbox.min_x()).chain(x.iter().cloned()).chain(once(bbox.max_x())).tuple_windows().zip(row).enumerate() {
-                    if i > 0 {
-                        writeln!(w, r#"<line x1="{}" x2="{}" y1="{}" y2="{}" level="{level}"></line>"#,
-                            min_x, min_x, bbox.min_y(), bbox.max_y());
-                    }
-
-                    let bbox = RectF::from_points(Vector2F::new(min_x, min_y), Vector2F::new(max_x, max_y));
-                    _render(w, spans, cell, bbox, level+1);
-                }
-            }
-        }
-        Node::Table { .. } => {
-            
-        }
-    }
-}
-
-fn split(boxes: &mut [(RectF, usize)], spans: &[TextSpan], lines: &Lines) -> Node {
-    let num_boxes = boxes.len();
-    if num_boxes < 2 {
-        return Node::singleton(boxes);
-    }
-
-    sort_x(boxes);
-    let max_x_gap = dist_x(boxes);
-    sort_y(boxes);
-    let max_y_gap = dist_y(boxes);
-
-    let x_y_ratio = 1.0;
-
-    let max_gap = match (max_x_gap, max_y_gap) {
-        (Some((x, _)), Some((y, _))) => x.max(y * x_y_ratio),
-        (Some((x, _)), None) => x,
-        (None, Some((y, _))) => y * x_y_ratio,
-        (None, None) => {
-            sort_x(boxes);
-            return Node::singleton(boxes);
-        }
-    };
-    let x_threshold = (max_gap * 0.5).max(1.0);
-    let y_threshold = (max_gap * 0.5 / x_y_ratio).max(0.1);
-    let mut cells = vec![];
-
-    let y_gaps: Vec<f32> = gaps(y_threshold, boxes, |r| (r.min_y(), r.max_y()))
-        .collect();
-    
-    sort_x(boxes);
-    let x_gaps: Vec<f32> = gaps(x_threshold, boxes, |r| (r.min_x(), r.max_x()))
-        .collect();
-
-    if x_gaps.len() == 0 && y_gaps.len() == 0 {
-        return overlapping_lines(boxes);
-    }
-
-    if x_gaps.len() > 1 && y_gaps.len() > 1 {
-        return split2(boxes, spans, lines);
-    }
-
-    sort_y(boxes);
-    for row in split_by(boxes, &y_gaps, |r| r.min_y()) {
-
-        if x_gaps.len() > 0 {
-            sort_x(row);
-            for cell in split_by(row, &x_gaps, |r| r.min_x()) {
-                sort_y(cell);
-                assert!(cell.len() < num_boxes);
-                cells.push(split(cell, spans, lines));
-            }
-        } else {
-            cells.push(split(row, spans, lines));
-        }
-    }
-
-    assert!(x_gaps.len() > 0 || y_gaps.len() > 0);
-    let tag = if y_gaps.len() == 0 {
-        if cells.iter().all(|n| n.tag() <= NodeTag::Line) {
-            NodeTag::Line
-        } else {
-            NodeTag::Complex
-        }
-    } else if x_gaps.len() == 0 {
-        if cells.iter().all(|n| n.tag() <= NodeTag::Line) {
-            NodeTag::Paragraph
-        } else {
-            NodeTag::Complex
-        }
-    } else {
-        NodeTag::Complex
-    };
-
-    Node::Grid {
-        x: x_gaps,
-        y: y_gaps,
-        cells,
-        tag,
-    }
-}
-#[allow(dead_code)]
-fn split_v(boxes: &mut [(RectF, usize)]) -> Node {
-    let num_boxes = boxes.len();
-    if num_boxes < 2 {
-        return Node::singleton(boxes)
-    }
-
-    let max_y_gap = dist_y(boxes);
-
-    let max_gap = match max_y_gap {
-        Some((y, _)) => y,
-        None => {
-            sort_x(boxes);
-            return Node::singleton(boxes);
-        }
-    };
-    let threshold = max_gap * 0.8;
-    let mut cells = vec![];
-
-    let y_gaps: Vec<f32> = gaps(threshold, boxes, |r| (r.min_y(), r.max_y()))
-        .collect();
-    
-    for row in split_by(boxes, &y_gaps, |r| r.min_y()) {
-        assert!(row.len() < num_boxes);
-        cells.push(split_v(row));
-    }
-
-    let tag = if cells.iter().all(|n| n.tag() <= NodeTag::Line) {
-        NodeTag::Paragraph
-    } else {
-        NodeTag::Complex
-    };
-
-    Node::Grid {
-        x: vec![],
-        y: y_gaps,
-        cells,
-        tag,
-    }
-}
-
-fn top_bottom_gap(boxes: &mut [(RectF, usize)], bbox: RectF) -> (Option<usize>, Option<usize>) {
-    let num_boxes = boxes.len();
-    if num_boxes < 2 {
-        return (None, None);
-    }
-
-    let mut gaps = gap_list(boxes, |r| (r.min_y(), r.max_y()));
-    let top_limit = bbox.min_y() + bbox.height() * 0.2;
-    let bottom_limit = bbox.min_y() + bbox.height() * 0.8;
-    match gaps.next() {
-        Some((y, _, top)) if y < top_limit => {
-            match gaps.last() {
-                Some((y, _, bottom)) if y > bottom_limit => (Some(top), Some(bottom)),
-                _ => (Some(top), None)
-            }
-        }
-        Some((y, _, bottom)) if y > bottom_limit => (None, Some(bottom)),
-        _ => (None, None)
-    }
-}
-fn left_right_gap(boxes: &mut [(RectF, usize)], bbox: RectF) -> (Option<usize>, Option<usize>) {
-    let num_boxes = boxes.len();
-    if num_boxes < 2 {
-        return (None, None);
-    }
-
-    let mut gaps = gap_list(boxes, |r| (r.min_x(), r.max_x()));
-    let left_limit = bbox.min_x() + bbox.width() * 0.2;
-    let right_limit = bbox.min_x() + bbox.width() * 0.8;
-    match gaps.next() {
-        Some((x, _, left)) if x < left_limit  => {
-            match gaps.last() {
-                Some((x, _, right)) if x > right_limit => (Some(left), Some(right)),
-                _ => (Some(left), None)
-            }
-        }
-        Some((x, _, right)) if x > right_limit => (None, Some(right)),
-        _ => (None, None)
-    }
-}
-
-fn sort_x(boxes: &mut [(RectF, usize)]) {
-    boxes.sort_unstable_by(|a, b| a.0.min_x().partial_cmp(&b.0.min_x()).unwrap());
-}
-fn sort_y(boxes: &mut [(RectF, usize)]) {
-    boxes.sort_unstable_by(|a, b| a.0.min_y().partial_cmp(&b.0.min_y()).unwrap());
-}
-fn overlapping_lines(boxes: &mut [(RectF, usize)]) -> Node {
-    sort_y(boxes);
-    let avg_height = avg(boxes.iter().map(|(r, _)| r.height())).unwrap();
-    
-    let mut y_center = boxes[0].0.center().y();
-    let mut lines = vec![];
-    let mut y_splits = vec![];
-
-    let mut start = 0;
-    'a: loop {
-        for (i, &(r, _)) in boxes[start..].iter().enumerate() {
-            if r.center().y() > 0.5 * avg_height + y_center {
-                let end = start + i;
-                sort_x(&mut boxes[start..end]);
-                let bbox = boxes[start..end].iter().map(|&(r, _)| r).reduce(|a, b| a.union_rect(b)).unwrap();
-
-                y_splits.push(bbox.max_y());
-                lines.push(Node::singleton(&boxes[start..end]));
-                y_center = r.center().y();
-
-                start = end;
-                continue 'a;
-            }
-        }
-
-        sort_x(&mut boxes[start..]);
-        lines.push(Node::singleton(&boxes[start..]));
-
-        break;
-    }
-    match lines.len() {
-        0 => Node::singleton(&[]),
-        1 => lines.pop().unwrap(),
-        _ => Node::Grid {
-            x: vec![],
-            y: y_splits,
-            cells: lines,
-            tag: NodeTag::Paragraph
-        }
-    }
-}
-
-fn gap_list<'a>(boxes: &'a [(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32) + 'a) -> impl Iterator<Item=(f32, f32, usize)> + 'a {
-    let mut boxes = boxes.iter();
-    let &(ref r, _) = boxes.next().unwrap();
-    let (_, mut last_max) = span(r);
-    boxes.enumerate().filter_map(move |(idx, &(ref r, _))| {
-        let (min, max) = span(&r);
-        let r = if min > last_max {
-            Some((last_max, min, idx+1))
-        } else {
-            None
-        };
-        last_max = max.max(last_max);
-        r
-    })
-}
-
-fn gaps<'a>(threshold: f32, boxes: &'a [(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32) + 'a) -> impl Iterator<Item=f32> + 'a {
-    let mut boxes = boxes.iter();
-    let &(ref r, _) = boxes.next().unwrap();
-    let (_, mut last_max) = span(r);
-    boxes.filter_map(move |&(ref r, _)| {
-        let (min, max) = span(&r);
-        let r = if min - last_max >= threshold {
-            Some(0.5 * (last_max + min))
-        } else {
-            None
-        };
-        last_max = max.max(last_max);
-        r
-    })
-}
-
-fn max_gap(boxes: &[(RectF, usize)], span: impl Fn(&RectF) -> (f32, f32)) -> Option<(f32, f32)> {
-    gap_list(boxes, span)
-    .max_by_key(|&(a, b, _)| NotNan::new(b - a).unwrap())
-    .map(|(a, b, _)| (b - a, 0.5 * (a + b)))
-}
-
-fn dist_x(boxes: &[(RectF, usize)]) -> Option<(f32, f32)> {
-    max_gap(boxes, |r| (r.min_x(), r.max_x()))
-}
-fn dist_y(boxes: &[(RectF, usize)]) -> Option<(f32, f32)> {
-    max_gap(boxes, |r| (r.min_y(), r.max_y()))
-}
-fn split_by<'a>(list: &'a mut [(RectF, usize)], at: &'a [f32], by: impl Fn(&RectF) -> f32) -> impl Iterator<Item=&'a mut [(RectF, usize)]> {
-    SplitBy {
-        data: list,
-        points: at.iter().cloned(),
-        by,
-        end: false
-    }
-}
-
-struct SplitBy<'a, I, F> {
-    data: &'a mut [(RectF, usize)],
-    points: I,
-    by: F,
-    end: bool,
-}
-impl<'a, I, F> Iterator for SplitBy<'a, I, F> where
-    I: Iterator<Item=f32>,
-    F: Fn(&RectF) -> f32
-{
-    type Item = &'a mut [(RectF, usize)];
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.end {
-            return None;
-        }
-        match self.points.next() {
-            Some(p) => {
-                let idx = self.data.iter().position(|(ref r, _)| (self.by)(r) > p).unwrap_or(self.data.len());
-                let (head, tail) = take(&mut self.data).split_at_mut(idx);
-                self.data = tail;
-                Some(head)
-            },
-            None => {
-                self.end = true;
-                Some(take(&mut self.data))
-            }
-        }
-    }
-}
-
-use super::util::Tri;
-#[derive(Copy, Clone, Debug, PartialEq)]
-enum Class {
-    Number,
-    Header,
-    Paragraph,
-    Mixed,
-}
-
-#[derive(Debug)]
-struct TriCount {
-    tru: usize,
-    fal: usize,
-}
-impl TriCount {
-    fn new() -> Self {
-        TriCount {
-            tru: 0,
-            fal: 0
-        }
-    }
-    fn add(&mut self, b: bool) {
-        match b {
-            false => self.fal += 1,
-            true => self.tru += 1,
-        }
-    }
-    fn count(&self) -> Tri {
-        match (self.fal, self.tru) {
-            (0, 0) => Tri::Unknown,
-            (0, _) => Tri::True,
-            (_, 0) => Tri::False,
-            (f, t) => Tri::Maybe(t as f32 / (t + f) as f32)
-        }
-    }
-}
-fn classify<'a>(spans: impl Iterator<Item=&'a TextSpan>) -> Class {
-    use pdf_render::FontEntry;
-
-    let mut bold = TriCount::new();
-    let mut numeric = TriCount::new();
-    let mut uniform = TriCount::new();
-    let mut first_font: *const FontEntry = std::ptr::null();
-
-    for s in spans {
-        numeric.add(is_number(&s.text));
-        if let Some(ref font) = s.font {
-            bold.add(font.name.contains("Bold"));
-            let font_ptr = Arc::as_ptr(font);
-            if first_font.is_null() {
-                first_font = font_ptr;
-            } else {
-                uniform.add(font_ptr == first_font);
-            }
-        }
-    }
-    uniform.add(true);
-
-    match (numeric.count(), bold.count(), uniform.count()) {
-        (Tri::True, _, Tri::True) => Class::Number,
-        (_, Tri::True, Tri::True) => Class::Header,
-        (_, Tri::False, Tri::True) => Class::Paragraph,
-        (_, Tri::False, _) => Class::Paragraph,
-        (_, Tri::Maybe(_), _) => Class::Paragraph,
-        _ => Class::Mixed
-    }
-}
\ No newline at end of file
diff --git a/src/util.rs b/src/util.rs
index a68d533..adf68cc 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,7 +1,3 @@
-use pathfinder_geometry::rect::RectF;
-use serde::{Serialize, Deserialize};
-
-
 pub fn is_number(s: &str) -> bool {
     s.len() > 0 && s.chars().all(|c| ('0' ..= '9').contains(&c))
 }
@@ -18,37 +14,4 @@ pub fn avg(iter: impl Iterator<Item=f32>) -> Option<f32> {
     } else {
         None
     }
-}
-
-pub enum Tri {
-    False,
-    True,
-    Maybe(f32),
-    Unknown,
-}
-
-#[derive(Copy, Clone, Debug)]
-#[derive(Serialize, Deserialize)]
-#[repr(C)]
-pub struct Rect {
-    pub x: f32,
-    pub y: f32,
-    pub w: f32,
-    pub h: f32
-}
-impl From<RectF> for Rect {
-    fn from(r: RectF) -> Self {
-        Rect {
-            x: r.origin_x(),
-            y: r.origin_y(),
-            w: r.width(),
-            h: r.height()
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct CellContent {
-    pub text: String,
-    pub rect: Rect,
 }
\ No newline at end of file