Skip to content

Commit

Permalink
refactor: remove debug prints
Browse files Browse the repository at this point in the history
  • Loading branch information
kod-kristoff committed May 3, 2024
1 parent bcc6d79 commit 516c418
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 109 deletions.
85 changes: 7 additions & 78 deletions src/parallel_corpus/graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import itertools
import logging
import re
Expand Down Expand Up @@ -55,7 +54,6 @@ def copy_with_updated_side_and_edges(
return Graph(source=source, target=target, edges=edges, comment=self.comment)

def copy_with_edges(self, edges: Edges) -> Self:
print(f"Graph.copy_with_edges; self={self}")
return Graph(source=self.source, target=self.target, edges=edges, comment=self.comment)


Expand Down Expand Up @@ -86,7 +84,6 @@ def edge_record(es: List[Edge]) -> Dict[str, Edge]:


def init(s: str, *, manual: bool = False) -> Graph:
print(f"graph.init; {s=}")
return init_from(token.tokenize(s), manual=manual)


Expand Down Expand Up @@ -116,7 +113,6 @@ def merge_edges(*es) -> Edge:
manual = False
comments = []
for e in es:
print(f"{e=}")
ids.extend(iter(e.ids))
labels.extend(iter(e.labels))
manual = manual or e.manual
Expand All @@ -134,7 +130,6 @@ def merge_edges(*es) -> Edge:


def align(g: Graph) -> Graph:
print(f"align start; graph={g}")
# Use a union-find to group characters into edges.
uf = parallel_corpus.shared.union_find.poly_union_find(lambda u: u)
em = edge_map(g)
Expand All @@ -147,9 +142,7 @@ def align(g: Graph) -> Graph:
),
)
char_diff = diffs.hdiff(chars.source, chars.target, lambda u: u.char, lambda u: u.char)
print(f"{char_diff=}")
for c in char_diff:
# print(f"{c=}")
# these undefined makes the alignment skip spaces.
# they originate from to_char_ids
if c.change == diffs.ChangeType.CONSTANT and (c.a.id is not None and c.b.id is not None):
Expand All @@ -163,24 +156,12 @@ def update_edges(tokens, _side):
if not e_repr.manual:
labels = e_repr.labels if first(e_repr.id) else []
e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment)
# print(f"{e_repr.comment=}")
dicts.modify(
proto_edges, uf.find(tok.id), zero_edge, lambda e: merge_edges(e, e_token)
)
# key = uf.find(tok.id)
# print(f"{key=}")
# e1 = proto_edges.get(key) or zero_edge
# proto_edges[key] = merge_edges(e1, e_token)
# print(f"{proto_edges[key]=}")
# k = uf.find(token.id)
# if k is None or k not in proto_edges:
# raise NotImplementedError("?")
# else:

map_sides(g, update_edges)
print(f"align after map_sides; graph={g}")
edges = edge_record(dicts.traverse(proto_edges, lambda e, _: e))
print(f"{edges=}")
return g.copy_with_edges(edges)


Expand Down Expand Up @@ -214,22 +195,19 @@ def edge_map(g: Graph) -> Dict[str, Edge]:


def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph:
print(f"graph.unaligned_set_side; graph={g}, {side=}, {text=}")
text0 = get_side_text(g, side)
edits = parallel_corpus.shared.ranges.edit_range(text0, text)
print(f"graph.unaligned_set_side; {edits=}")

from_, to = edits["from"], edits["to"]
new_text = text[from_ : (len(text) - (len(text0) - to))]
print(f"graph.unaligned_set_side; {new_text=}")
return unaligned_modify(g, from_, to, new_text, side)


def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "target") -> Graph:
"""Replace the text at some position, merging the spans it touches upon.
>>> show = lambda g: list(map(lambda t: t["text"], g["target"]))
>>> ids = lambda g: " ".join(map(lambda t: t["id"], g["target"]))
>>> show = lambda g: [t.text for t in g.target]
>>> ids = lambda g: " ".join(t.id for t in g.target)
>>> g = init('test graph hello')
>>> assert show(g) == ['test ', 'graph ', 'hello ']
>>> show(unaligned_modify(g, 0, 0, 'new'))
Expand All @@ -253,14 +231,14 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar
>>> show(unaligned_modify(g, 0, 15, '_'))
['_o ']
>>> show(unaligned_modify(g, 0, 16, '_')) /
> ['_ ']
>>> show(unaligned_modify(g, 0, 16, '_'))
['_ ']
>>> show(unaligned_modify(g, 0, 17, '_')) /
> ['_ ']
>>> show(unaligned_modify(g, 0, 17, '_'))
['_ ']
>>> show(unaligned_modify(g, 16, 16, ' !'))
=> ['test ', 'graph ', 'hello ', '! ']
['test ', 'graph ', 'hello ', '! ']
Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert)
Expand All @@ -269,18 +247,12 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar
tokens = get_side_texts(g, side)
token_at = token.token_at(tokens, from_)
from_token, from_ix = token_at["token"], token_at["offset"]
# const {token: from_token, offset: from_ix} = T.token_at(tokens, from)
# const pre = (tokens[from_token] || '').slice(0, from_ix)
pre = (tokens[from_token] or "")[:from_ix]
if to == len(get_side_text(g, side)):
# return unaligned_modify_tokens(g, from_token, g[side].length, pre + text, side)
return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side)
# const {token: to_token, offset: to_ix} = T.token_at(tokens, to)
to_token_at = token.token_at(tokens, to)
to_token, to_ix = to_token_at["token"], to_token_at["offset"]
# const post = (tokens[to_token] || '').slice(to_ix)
post = (tokens[to_token] or "")[to_ix:]
# return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side)
return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side)


Expand Down Expand Up @@ -334,84 +306,46 @@ def unaligned_modify_tokens(
):
raise ValueError(f"Invalid coordinates {g} {from_} {to} {text}")

# if (from < 0 || to < 0 || from > g[side].length || to > g[side].length || from > to) {
# throw new Error('Invalid coordinates ' + Utils.show({g, from, to, text}))
# }
# if (text.match(/^\s+$/)) {
if _ := ALL_WHITESPACE.fullmatch(text):
# replacement text is only whitespace: need to find some token to put it on
# if (from > 0) {
if from_ > 0:
# return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side)
return unaligned_modify_tokens(
g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side
)
elif to < len(g.get_side(side)):
# } else if (to < g[side].length) {
# return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side)
return unaligned_modify_tokens(
g, from_, to + 1, text + g.get_side(side)[to].text, side
)

# } else {
else:
# // console.warn('Introducing whitespace into empty graph')
logger.warn("Introducing whitespace into empty graph")

# }
# }
# if (text.match(/\S$/) && to < g[side].length) {
if NO_WHITESPACE_AT_END.match(text[-1:]) is not None and to < len(g.get_side(side)):
# if replacement text does not end with whitespace, grab the next word as well
# return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side)
return unaligned_modify_tokens(g, from_, to + 1, text + g.get_side(side)[to].text, side)

# }

# if (from > 0 && from == g[side].length && to === g[side].length) {
if from_ > 0 and from_ == len(g.get_side(side)) and to == len(g.get_side(side)):
# we're adding a word at the end but the last token might not end in whitespace:
# glue them together

# return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side)
return unaligned_modify_tokens(
g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side
)

# }

# const id_offset = next_id(g)
id_offset = next_id(g)

# const tokens = T.tokenize(text).map((t, i) => Token(t, side[0] + (id_offset + i)))
tokens = [
Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(token.tokenize(text))
]

# const [new_tokens, removed] = Utils.splice(g[side], from, to - from, ...tokens)
new_tokens, removed = lists.splice(g.get_side(side), from_, to - from_, *tokens)

# const ids_removed = new Set(removed.map(t => t.id))
ids_removed = {t.id for t in removed}
print(ids_removed)

# const new_edge_ids = new Set<string>(tokens.map(t => t.id))
new_edge_ids = {t.id for t in tokens}
# const new_edge_labels = new Set<string>()
new_edge_labels = set()
# let new_edge_manual = false
new_edge_manual = False

# const edges = record.filter(g.edges, e => {
# if (e.ids.some(id => ids_removed.has(id))) {
# e.ids.forEach(id => ids_removed.has(id) || new_edge_ids.add(id))
# e.labels.forEach(lbl => new_edge_labels.add(lbl))
# new_edge_manual = new_edge_manual || e.manual === true
# return false
# } else {
# return true
# }
# })
def fun(e: Edge, _id: str) -> bool:
if any(id_ in ids_removed for id_ in e.ids):
for id_ in e.ids:
Expand All @@ -424,15 +358,10 @@ def fun(e: Edge, _id: str) -> bool:

edges = dicts.filter_dict(g.edges, fun)

# if (new_edge_ids.size > 0) {
# const e = Edge([...new_edge_ids], [...new_edge_labels], new_edge_manual)
# edges[e.id] = e
# }
if new_edge_ids:
e = edge(list(new_edge_ids), list(new_edge_labels), manual=new_edge_manual)
edges[e.id] = e

# return {...g, [side]: new_tokens, edges}
return g.copy_with_updated_side_and_edges(side, new_tokens, edges)


Expand Down
11 changes: 0 additions & 11 deletions src/parallel_corpus/shared/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,8 @@
def end_with_space(s: str) -> str:
if not s:
return s
# print(f"{s[-1]=}")
# print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}")
return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s

# return next(
# (
# xs[-(i - 1) :] if i == 0 else xs[-i:]
# for i, e in enumerate(reversed(xs))
# if not predicate(e)
# ),
# xs,
# )


def uniq(xs: List[str]) -> List[str]:
used = set()
Expand Down
2 changes: 1 addition & 1 deletion src/parallel_corpus/shared/diffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def inserted(cls, b: B) -> Self:
return cls(ChangeType.INSERTED, b=b)

def model_dump(self) -> dict[str, Union[int, A, B]]:
out = {
out: Dict[str, Union[int, A, B]] = {
"change": int(self.change),
}
if self.a is not None:
Expand Down
20 changes: 3 additions & 17 deletions src/parallel_corpus/shared/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,9 @@


def take_last_while(predicate, xs: List) -> List:
end = -1
start = 0
for i, e in enumerate(reversed(xs)):
print(f"take_last_while; {i=}: {e=} {predicate(e)=} {start=} {end=}")
for e in reversed(xs):
if not predicate(e):
break
if predicate(e):
start -= 1
# if not predicate(e):
# if start is None:
# start = -(1) if i == 0 else -i
# if end is None:
# print(f"{i=}: {e=}")
# end = len(xs) - i
# break
# return xs[start:]
print(f"take_last_while; {start=} {end=}")
if start < 0:
return xs[start:]
return []
start -= 1
return xs[start:] if start < 0 else []
1 change: 0 additions & 1 deletion src/parallel_corpus/shared/union_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self, *, rev: Optional[List[int]] = None) -> None:
def find(self, x: int) -> int:
while x >= len(self._rev):
self._rev.append(None)
print(f"{self._rev=}")
if self._rev[x] is None:
self._rev[x] = x
elif self._rev[x] != x:
Expand Down
1 change: 0 additions & 1 deletion tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def test_graph_case1() -> None:
second = "Jonat han saknades , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501

g = graph.init(first)
print(f"{g=}")

gm = graph.set_target(g, second)
print(f"{gm=}")
Expand Down

0 comments on commit 516c418

Please sign in to comment.