Skip to content

Commit

Permalink
Support font encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
vidy committed Nov 19, 2024
1 parent 03d4789 commit a000d21
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 16 deletions.
11 changes: 8 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ description = "PDF text extraction"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html


[dependencies.pdf]
default-features=false
features = ["cache", "dump"]
git = "https://github.com/pdf-rs/pdf"

[dependencies]
pdf = { git = "https://github.com/pdf-rs/pdf", features = ["cache"] }
pdf_render = { git = "https://github.com/pdf-rs/pdf_render" }
font = { git = "https://github.com/pdf-rs/font" }
pdf_render= { git = "https://github.com/videni/pdf_render_with_vello", rev="2aae6fbec9e8276b24e6a38595c50e181dda0141"}
itertools = "*"
log = "*"
ordered-float = "*"
serde = { version = "*", features = ["derive"] }
unicode-normalization = "0.1.19"
font = { git = "https://github.com/pdf-rs/font", branch = "vello", features=['cff']}

pathfinder_geometry = { git = "https://github.com/servo/pathfinder" }
pathfinder_color = { git = "https://github.com/servo/pathfinder" }
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@ use std::collections::HashSet;

use entry::Flow;
use pdf::{backend::Backend, object::{Page, Resolve}, PdfError};
use pdf_render::{tracer::{TraceCache, Tracer, DrawItem}, Fill, render_pattern, render_page, FillMode};
use pdf_render::{tracer::{TraceCache, Tracer, DrawItem}, Fill, render_pattern, render_page, FillMode, font::OutlineBuilder};

mod tree;
mod util;
mod text;
pub mod entry;

pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &impl Resolve) -> Result<Flow, PdfError> {
let cache = TraceCache::new();
let mut cache = TraceCache::new(OutlineBuilder::default());

let mut clip_paths = vec![];
let mut tracer = Tracer::new(&cache, &mut clip_paths);
let mut tracer = Tracer::new(&mut cache, &mut clip_paths);

render_page(&mut tracer, resolve, &page, Default::default())?;

Expand Down Expand Up @@ -68,7 +68,7 @@ pub fn run<B: Backend>(file: &pdf::file::CachedFile<B>, page: &Page, resolve: &i
continue;
}
};
let mut pat_tracer = Tracer::new(&cache, &mut clip_paths);
let mut pat_tracer = Tracer::new(&mut cache, &mut clip_paths);

render_pattern(&mut pat_tracer, &*pattern, resolve)?;
let pat_items = pat_tracer.finish();
Expand Down
3 changes: 2 additions & 1 deletion src/text.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use font::Encoder;
use pathfinder_geometry::vector::Vector2F;
use pdf_render::TextSpan;
use itertools::{Itertools};
use unicode_normalization::UnicodeNormalization;
use crate::{util::avg, entry::Word, util::Rect};

pub fn concat_text<'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan> + Clone) -> Vec<Word> {
pub fn concat_text<'a, E: Encoder + 'a>(out: &mut String, items: impl Iterator<Item=&'a TextSpan<E>> + Clone) -> Vec<Word> {
let mut words = vec![];
let gaps = items.clone()
.flat_map(|s| {
Expand Down
17 changes: 9 additions & 8 deletions src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ use crate::util::{is_number, avg, CellContent};
use crate::text::{concat_text};
use std::mem::take;
use table::Table;
use font::{Encoder, Glyph};

pub fn build(spans: &[TextSpan], bbox: RectF, lines: &[[f32; 4]]) -> Node {
pub fn build<E: Encoder>(spans: &[TextSpan<E>], bbox: RectF, lines: &[[f32; 4]]) -> Node {
if spans.len() == 0 {
return Node::singleton(&[]);
}
Expand Down Expand Up @@ -180,7 +181,7 @@ impl Span {
}
}

pub fn split2(boxes: &mut [(RectF, usize)], spans: &[TextSpan], lines_info: &Lines) -> Node {
pub fn split2<E: Encoder>(boxes: &mut [(RectF, usize)], spans: &[TextSpan<E>], lines_info: &Lines) -> Node {
use std::mem::replace;

#[derive(Debug)]
Expand Down Expand Up @@ -383,7 +384,7 @@ pub enum NodeTag {
Complex,
}

pub fn items(mut flow: &mut Flow, spans: &[TextSpan], node: &Node, x_anchor: f32) {
pub fn items<E: Encoder>(mut flow: &mut Flow, spans: &[TextSpan<E>], node: &Node, x_anchor: f32) {
match *node {
Node::Final { ref indices } => {
if indices.len() > 0 {
Expand Down Expand Up @@ -534,10 +535,10 @@ pub fn items(mut flow: &mut Flow, spans: &[TextSpan], node: &Node, x_anchor: f32
}


pub fn render(w: &mut String, spans: &[TextSpan], node: &Node, bbox: RectF) {
pub fn render<E: Encoder>(w: &mut String, spans: &[TextSpan<E>], node: &Node, bbox: RectF) {
_render(w, spans, node, bbox, 0)
}
fn _render(w: &mut String, spans: &[TextSpan], node: &Node, bbox: RectF, level: usize) {
fn _render<E: Encoder>(w: &mut String, spans: &[TextSpan<E>], node: &Node, bbox: RectF, level: usize) {
use std::fmt::Write;

match *node {
Expand Down Expand Up @@ -596,7 +597,7 @@ fn _render(w: &mut String, spans: &[TextSpan], node: &Node, bbox: RectF, level:
}
}

fn split(boxes: &mut [(RectF, usize)], spans: &[TextSpan], lines: &Lines) -> Node {
fn split<E: Encoder>(boxes: &mut [(RectF, usize)], spans: &[TextSpan<E>], lines: &Lines) -> Node {
let num_boxes = boxes.len();
if num_boxes < 2 {
return Node::singleton(boxes);
Expand Down Expand Up @@ -925,13 +926,13 @@ impl TriCount {
}
}
}
fn classify<'a>(spans: impl Iterator<Item=&'a TextSpan>) -> Class {
fn classify<'a, E: Encoder + 'a>(spans: impl Iterator<Item=&'a TextSpan<E>>) -> Class {
use pdf_render::FontEntry;

let mut bold = TriCount::new();
let mut numeric = TriCount::new();
let mut uniform = TriCount::new();
let mut first_font: *const FontEntry = std::ptr::null();
let mut first_font: *const FontEntry<E> = std::ptr::null();

for s in spans {
numeric.add(is_number(&s.text));
Expand Down

0 comments on commit a000d21

Please sign in to comment.