Skip to content

Commit

Permalink
Merge pull request #23 from crypdick/feature/ignore-frontmatter
Browse files Browse the repository at this point in the history
Ignore frontmatter for RAG
  • Loading branch information
eugeneyan authored Feb 29, 2024
2 parents 5c47fb2 + 564cb30 commit 9a6d256
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/prep/build_vault_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,19 @@ def create_vault_dict(vault_path: str, paths: List[str]) -> dict[str, dict[str,
if "<%" in lines[0]:
logger.debug(f"Skipping templater template in {filename}")
continue

# detect if the file starts with YAML front matter, and if so, skip it
if lines[0] == "---\n":
# find the next "---" and skip everything in between
last_line = 0
for i, line in enumerate(lines[1:]):
if line == ("---\n"):
last_line = i
break
if last_line == 0:
raise ValueError(f"YAML front matter not closed in {filename}")
lines = lines[last_line + 1 :]

chunks = chunk_doc_to_dict(lines)

if len(chunks) > 0: # Only add docs with chunks to dict
Expand Down

0 comments on commit 9a6d256

Please sign in to comment.