Skip to content

Commit

Permalink
Fix unicode handling
Browse files Browse the repository at this point in the history
Fixes some of the issues reported in #5, but not all
  • Loading branch information
osa1 committed Sep 24, 2021
1 parent e59fe3c commit 5acecb3
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 13 deletions.
40 changes: 27 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,23 +268,35 @@ fn walk_ast<W: Write>(
}
}

fn get_token_line_col(token: &str, column0: usize, idx: usize) -> (usize, usize) {
let mut chars = token.chars();
fn get_token_line_col(token: &str, column0: usize, mut byte_idx: usize) -> (usize, usize, usize) {
let mut chars = token.chars().peekable();

let mut line = 0;
let mut col = column0;

for _ in 0..idx {
let c = chars.next();
if c == Some('\n') {
let mut col_byte_idx = 0;

while byte_idx != 0 {
let c = chars.next().unwrap();
byte_idx -= c.len_utf8();
if c == '\r' {
if let Some('\n') = chars.peek() {
let _ = chars.next(); // consume '\n'
byte_idx -= '\n'.len_utf8();
}
line += 1;
col = 0;
col_byte_idx = 0;
} else if c == '\n' {
line += 1;
col = 0;
col_byte_idx = 0;
} else {
col += 1;
col_byte_idx += c.len_utf8();
}
}

(line, col)
(line, col, col_byte_idx)
}

fn check_word_bounds(text: &str, match_begin: usize, match_end: usize) -> bool {
Expand All @@ -303,6 +315,7 @@ fn check_word_bounds(text: &str, match_begin: usize, match_end: usize) -> bool {
true
}

/// Returns byte indices of matches of `pattern` in `token`
fn match_token(
token: &str,
pattern: &str,
Expand Down Expand Up @@ -343,23 +356,24 @@ fn match_token(
.collect()
}

// NB. `match_byte_idx` is a byte index to `token_str`
fn report_match<W: Write>(
stdout: &mut W,
cfg: &Cfg,
path: &Path,
node: &Node,
token_str: &str,
lines: &[&str],
match_: usize,
match_byte_idx: usize,
header_printed: &mut bool,
first: &mut bool,
) {
let pos = node.start_position();

let (token_line, token_col) = get_token_line_col(token_str.as_ref(), pos.column, match_);
let (token_line, column, column_byte) =
get_token_line_col(token_str, pos.column, match_byte_idx);

let line = pos.row + token_line;
let column = token_col;

// Print header (if grouping)
if !*header_printed && cfg.group {
Expand Down Expand Up @@ -429,9 +443,9 @@ fn report_match<W: Write>(
}
};

let before_match = &line[0..column];
let match_ = &line[column..column + cfg.pattern.len()];
let after_match = &line[column + cfg.pattern.len()..];
let before_match = &line[0..column_byte];
let match_ = &line[column_byte..column_byte + cfg.pattern.len()];
let after_match = &line[column_byte + cfg.pattern.len()..];
let _ = write!(stdout, "{}", before_match);
if cfg.color {
let _ = write!(
Expand Down
19 changes: 19 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,22 @@ fn simple_word_id() {
1:fn test() {\n"
);
}

#[test]
fn issue_5_1() {
let str = run_args(&[
"sg",
"--ocaml",
"-k",
"identifier",
"1",
"test_files/issue_5_1.ml",
"--nocolor",
]);

assert_eq!(
str,
"test_files/issue_5_1.ml\n\
4:1\n"
);
}
6 changes: 6 additions & 0 deletions test_files/issue_5_1.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(*
¹
¹
1
*)

0 comments on commit 5acecb3

Please sign in to comment.