diff --git a/querybook/server/datasources/github.py b/querybook/server/datasources/github.py index 847eb76d3..3dd607136 100644 --- a/querybook/server/datasources/github.py +++ b/querybook/server/datasources/github.py @@ -4,6 +4,7 @@ from env import QuerybookSettings from lib.github.github import github_manager from typing import Dict, List, Optional +from lib.github.serializers import serialize_datadoc_to_markdown from logic import github as logic from logic import datadoc as datadoc_logic from const.datasources import RESOURCE_NOT_FOUND_STATUS_CODE @@ -113,3 +114,35 @@ def get_datadoc_versions( page = offset // limit + 1 versions = github_client.get_datadoc_versions(page=page) return versions + + +@register("/github/datadocs//compare/", methods=["GET"]) +@with_github_client +def compare_datadoc_versions( + github_client: GitHubClient, datadoc_id: int, commit_sha: str +) -> Dict: + """ + Compare the current DataDoc with a specific commit. + """ + assert_can_read(datadoc_id) + verify_data_doc_permission(datadoc_id) + current_datadoc = datadoc_logic.get_data_doc_by_id(datadoc_id) + api_assert( + current_datadoc is not None, + "Current DataDoc not found", + status_code=RESOURCE_NOT_FOUND_STATUS_CODE, + ) + current_markdown = serialize_datadoc_to_markdown( + current_datadoc, exclude_metadata=True + ) + + # Get the DataDoc content at the specified commit and re-serialize with metadata excluded + commit_datadoc = github_client.get_datadoc_at_commit(commit_sha) + commit_markdown = serialize_datadoc_to_markdown( + commit_datadoc, exclude_metadata=True + ) + + return { + "current_content": current_markdown, + "commit_content": commit_markdown, + } diff --git a/querybook/server/lib/github/serializers.py b/querybook/server/lib/github/serializers.py index 2f9fa4bab..73b146264 100644 --- a/querybook/server/lib/github/serializers.py +++ b/querybook/server/lib/github/serializers.py @@ -17,74 +17,108 @@ def parse_datetime_as_utc(date_str: Optional[str]) -> datetime: return datetime.now(timezone.utc).replace(tzinfo=timezone.utc) -def serialize_datadoc_to_markdown(datadoc: DataDoc) -> str: +def serialize_datadoc_to_markdown( + datadoc: DataDoc, exclude_metadata: bool = False +) -> str: """ Serialize a DataDoc instance to a Markdown string with YAML front matter. """ - datadoc_metadata = { - "id": datadoc.id, - "environment_id": datadoc.environment_id, - "public": datadoc.public, - "archived": datadoc.archived, - "owner_uid": datadoc.owner_uid, - "created_at": datadoc.created_at.isoformat() if datadoc.created_at else None, - "updated_at": datadoc.updated_at.isoformat() if datadoc.updated_at else None, - "meta": datadoc.meta, - "title": datadoc.title, - } - try: - front_matter = ( - f"---\n{yaml.dump(datadoc_metadata, default_flow_style=False)}---\n\n" - ) - except yaml.YAMLError as e: - raise ValueError(f"Error serializing DataDoc metadata to YAML: {e}") + markdown_parts = [] + + if not exclude_metadata: + datadoc_metadata = { + "id": datadoc.id, + "environment_id": datadoc.environment_id, + "public": datadoc.public, + "archived": datadoc.archived, + "owner_uid": datadoc.owner_uid, + "created_at": ( + datadoc.created_at.isoformat() if datadoc.created_at else None + ), + "updated_at": ( + datadoc.updated_at.isoformat() if datadoc.updated_at else None + ), + "meta": datadoc.meta, + "title": datadoc.title, + } + try: + front_matter = ( + f"---\n{yaml.dump(datadoc_metadata, default_flow_style=False)}---\n\n" + ) + markdown_parts.append(front_matter) + + except yaml.YAMLError as e: + raise ValueError(f"Error serializing DataDoc metadata to YAML: {e}") title = f"# {datadoc.title}\n\n" - content = serialize_datacells(datadoc.cells) - markdown_content = front_matter + title + content - return markdown_content + markdown_parts.append(title) + + content = serialize_datacells( + cells=datadoc.cells, exclude_metadata=exclude_metadata + ) + markdown_parts.append(content) + return "".join(markdown_parts) -def serialize_datacells(cells: List[DataCell]) -> str: + +def serialize_datacells(cells: List[DataCell], exclude_metadata: bool = False) -> str: """ Serialize a list of DataCell instances to a Markdown string. """ - lines = [] + cell_strings = [] for cell in cells: - # Since GitHub's Markdown renderer does not recognize multiple --- blocks as separate YAML sections, - # we serialize cell metadata in HTML comment to hide it from rendered view - cell_metadata = { - "id": cell.id, - "cell_type": cell.cell_type.name.lower(), - "created_at": cell.created_at.isoformat() if cell.created_at else None, - "updated_at": cell.updated_at.isoformat() if cell.updated_at else None, - "meta": cell.meta, - } - try: - cell_metadata_yaml = yaml.dump(cell_metadata, default_flow_style=False) - except yaml.YAMLError as e: - raise ValueError(f"Error serializing cell metadata to YAML: {e}") - - cell_metadata_comment = f"\n" + cell_content = serialize_cell_content( + cell=cell, exclude_metadata=exclude_metadata + ) - cell_content = serialize_cell_content(cell) - lines.append(cell_metadata_comment + cell_content) + if not exclude_metadata: + # Since GitHub's Markdown renderer does not recognize multiple --- blocks as separate YAML sections, + # we serialize cell metadata in HTML comment to hide it from rendered view + cell_metadata = { + "id": cell.id, + "cell_type": cell.cell_type.name.lower(), + "created_at": cell.created_at.isoformat() if cell.created_at else None, + "updated_at": cell.updated_at.isoformat() if cell.updated_at else None, + "meta": cell.meta, + } + try: + cell_metadata_yaml = yaml.dump(cell_metadata, default_flow_style=False) + except yaml.YAMLError as e: + raise ValueError(f"Error serializing cell metadata to YAML: {e}") + + cell_metadata_comment = f"\n" + cell_strings.append(cell_metadata_comment + cell_content) + else: + cell_strings.append(cell_content) - return "\n\n".join(lines) + return "\n\n".join(cell_strings) -def serialize_cell_content(cell: DataCell) -> str: +def serialize_cell_content(cell: DataCell, exclude_metadata: bool = False) -> str: """ - Serialize a single DataCell instance to a Markdown string based on its type. + Serialize a single DataCell instance to a Markdown string based on its type """ cell_meta = cell.meta or {} + if cell.cell_type == DataCellType.query: query_title = cell_meta.get("title", "Query") - return f"## Query: {query_title}\n\n```sql\n{cell.context.strip()}\n```\n" + header = f"## Query: {query_title}\n\n" + if exclude_metadata: # Exclude code fences + content = f"{cell.context.strip()}\n" + else: + content = f"```sql\n{cell.context.strip()}\n```\n" + return header + content + elif cell.cell_type == DataCellType.text: - return f"## Text\n\n{cell.context.strip()}\n" + header = "## Text\n\n" + content = f"{cell.context.strip()}\n" + return header + content + elif cell.cell_type == DataCellType.chart: - return "## Chart\n\n*Chart generated from the metadata.*\n" + header = "## Chart\n\n" + content = "*Chart generated from the metadata.*\n" + return header + content + else: raise ValueError(f"Unknown cell type: {cell.cell_type}") diff --git a/querybook/webapp/components/DataDocGitHub/GitHub.scss b/querybook/webapp/components/DataDocGitHub/GitHub.scss index 86df3d822..05ea86cd9 100644 --- a/querybook/webapp/components/DataDocGitHub/GitHub.scss +++ b/querybook/webapp/components/DataDocGitHub/GitHub.scss @@ -82,7 +82,7 @@ flex-direction: column; height: 100%; background-color: var(--bg); - padding: 16px; + padding: 8px; box-sizing: border-box; overflow-y: auto; } diff --git a/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx b/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx index df12a6ac6..dec48794f 100644 --- a/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx +++ b/querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx @@ -2,11 +2,13 @@ import React, { useCallback, useState } from 'react'; import { QueryComparison } from 'components/TranspileQueryModal/QueryComparison'; import { usePaginatedResource } from 'hooks/usePaginatedResource'; +import { useResource } from 'hooks/useResource'; import { GitHubResource, ICommit } from 'resource/github'; import { AsyncButton } from 'ui/AsyncButton/AsyncButton'; import { IconButton } from 'ui/Button/IconButton'; import { FeatureDisabledMessage } from 'ui/DisabledSection/FeatureDisabledMessage'; import { ErrorPage } from 'ui/ErrorPage/ErrorPage'; +import { Link } from 'ui/Link/Link'; import { Loading } from 'ui/Loading/Loading'; import { Message } from 'ui/Message/Message'; @@ -42,6 +44,24 @@ export const GitHubVersions: React.FunctionComponent = ({ { batchSize: 5 } ); + const { + data: comparisonData, + isLoading: isComparisonLoading, + isError: isComparisonError, + } = useResource( + React.useCallback(() => { + if (selectedCommit) { + return GitHubResource.compareDataDocVersions( + docId, + selectedCommit.sha + ); + } + }, [docId, selectedCommit]), + { + fetchOnMount: !!selectedCommit, + } + ); + const handleRestore = useCallback( async (commitSha: string, commitMessage: string) => { alert('Restore feature not implemented yet'); @@ -61,8 +81,8 @@ export const GitHubVersions: React.FunctionComponent = ({ setIsCompareOpen(true); } } else { - // Handle closing without a version (e.g. clicking close button) setIsCompareOpen(false); + setSelectedCommit(null); setIsFullScreen(false); } }, @@ -136,15 +156,17 @@ export const GitHubVersions: React.FunctionComponent = ({ {selectedCommit && (
- + + + = ({ tooltipPos="left" />
- + {isComparisonLoading ? ( + + ) : isComparisonError || !comparisonData ? ( + + ) : ( + + )}
)} diff --git a/querybook/webapp/resource/github.ts b/querybook/webapp/resource/github.ts index 9170c1932..321ef9c76 100644 --- a/querybook/webapp/resource/github.ts +++ b/querybook/webapp/resource/github.ts @@ -46,4 +46,11 @@ export const GitHubResource = { limit, offset, }), + compareDataDocVersions: (docId: number, commitSha: string) => + ds.fetch<{ + current_content: string; + commit_content: string; + }>(`/github/datadocs/${docId}/compare/`, { + commit_sha: commitSha, + }), };