Skip to content

Commit

Permalink
fix: added removal of null characters!
Browse files Browse the repository at this point in the history
  • Loading branch information
amindadgar committed Sep 26, 2024
1 parent 71d8e76 commit ad5186a
Showing 1 changed file with 5 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import logging
from typing import Any

Expand Down Expand Up @@ -158,7 +159,7 @@ def prepare_document(
content_url_updated, url_reference = prepare_raw_message_urls(content)

# always has length 1
assert len(author_name) == 1
assert len(author_name) == 1, "Either None or multiple authors!"

msg_meta_data = {
"channel": message["channelName"],
Expand Down Expand Up @@ -209,6 +210,9 @@ def prepare_document(
if check_no_content_only_links(content_url_updated):
raise ValueError("Message just did have urls")

# removing null characters
content_url_updated = re.sub(r'[\x00-\x1F\x7F]', '', content_url_updated)

doc: Document
if not exclude_metadata:
doc = Document(text=content_url_updated, metadata=msg_meta_data)
Expand Down

0 comments on commit ad5186a

Please sign in to comment.