This repository has been archived by the owner on May 6, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
150 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT | ||
// file at the top-level directory of this distribution and at | ||
// http://rust-lang.org/COPYRIGHT. | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
use std::cmp; | ||
|
||
pub fn lev_distance(me: &str, t: &str) -> usize { | ||
if me.is_empty() { return t.chars().count(); } | ||
if t.is_empty() { return me.chars().count(); } | ||
|
||
let mut dcol: Vec<_> = (0..t.len() + 1).collect(); | ||
let mut t_last = 0; | ||
|
||
for (i, sc) in me.chars().enumerate() { | ||
|
||
let mut current = i; | ||
dcol[0] = current + 1; | ||
|
||
for (j, tc) in t.chars().enumerate() { | ||
|
||
let next = dcol[j + 1]; | ||
|
||
if sc == tc { | ||
dcol[j + 1] = current; | ||
} else { | ||
dcol[j + 1] = cmp::min(current, next); | ||
dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; | ||
} | ||
|
||
current = next; | ||
t_last = j; | ||
} | ||
} | ||
|
||
dcol[t_last + 1] | ||
} | ||
|
||
pub fn max_suggestion_distance(name: &str) -> usize { | ||
use std::cmp::max; | ||
// As a loose rule to avoid obviously incorrect suggestions, clamp the | ||
// maximum edit distance we will accept for a suggestion to one third of | ||
// the typo'd name's length. | ||
max(name.len(), 3) / 3 | ||
} | ||
|
||
#[test] | ||
fn test_lev_distance() { | ||
use std::char::{ from_u32, MAX }; | ||
// Test bytelength agnosticity | ||
for c in (0..MAX as u32) | ||
.filter_map(|i| from_u32(i)) | ||
.map(|i| i.to_string()) { | ||
assert_eq!(lev_distance(&c[..], &c[..]), 0); | ||
} | ||
|
||
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n"; | ||
let b = "\nMary häd ä little lämb\n\nLittle lämb\n"; | ||
let c = "Mary häd ä little lämb\n\nLittle lämb\n"; | ||
assert_eq!(lev_distance(a, b), 1); | ||
assert_eq!(lev_distance(b, a), 1); | ||
assert_eq!(lev_distance(a, c), 2); | ||
assert_eq!(lev_distance(c, a), 2); | ||
assert_eq!(lev_distance(b, c), 1); | ||
assert_eq!(lev_distance(c, b), 1); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT | ||
// file at the top-level directory of this distribution and at | ||
// http://rust-lang.org/COPYRIGHT. | ||
// | ||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | ||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | ||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | ||
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
use owned_slice::OwnedSlice; | ||
|
||
use std::ptr; | ||
|
||
pub trait MoveMap<T>: Sized { | ||
fn move_map<F>(self, mut f: F) -> Self where F: FnMut(T) -> T { | ||
self.move_flat_map(|e| Some(f(e))) | ||
} | ||
|
||
fn move_flat_map<F, I>(self, f: F) -> Self | ||
where F: FnMut(T) -> I, | ||
I: IntoIterator<Item=T>; | ||
} | ||
|
||
impl<T> MoveMap<T> for Vec<T> { | ||
fn move_flat_map<F, I>(mut self, mut f: F) -> Self | ||
where F: FnMut(T) -> I, | ||
I: IntoIterator<Item=T> | ||
{ | ||
let mut read_i = 0; | ||
let mut write_i = 0; | ||
unsafe { | ||
let mut old_len = self.len(); | ||
self.set_len(0); // make sure we just leak elements in case of panic | ||
|
||
while read_i < old_len { | ||
// move the read_i'th item out of the vector and map it | ||
// to an iterator | ||
let e = ptr::read(self.get_unchecked(read_i)); | ||
let mut iter = f(e).into_iter(); | ||
read_i += 1; | ||
|
||
while let Some(e) = iter.next() { | ||
if write_i < read_i { | ||
ptr::write(self.get_unchecked_mut(write_i), e); | ||
write_i += 1; | ||
} else { | ||
// If this is reached we ran out of space | ||
// in the middle of the vector. | ||
// However, the vector is in a valid state here, | ||
// so we just do a somewhat inefficient insert. | ||
self.set_len(old_len); | ||
self.insert(write_i, e); | ||
|
||
old_len = self.len(); | ||
self.set_len(0); | ||
|
||
read_i += 1; | ||
write_i += 1; | ||
} | ||
} | ||
} | ||
|
||
// write_i tracks the number of actually written new items. | ||
self.set_len(write_i); | ||
} | ||
|
||
self | ||
} | ||
} | ||
|
||
impl<T> MoveMap<T> for OwnedSlice<T> { | ||
fn move_flat_map<F, I>(self, f: F) -> Self | ||
where F: FnMut(T) -> I, | ||
I: IntoIterator<Item=T> | ||
{ | ||
OwnedSlice::from_vec(self.into_vec().move_flat_map(f)) | ||
} | ||
} |