Skip to content

Commit

Permalink
feat: github compare diff backend
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangvi7 committed Nov 13, 2024
1 parent 707a801 commit 2d0c644
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 63 deletions.
33 changes: 33 additions & 0 deletions querybook/server/datasources/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from env import QuerybookSettings
from lib.github.github import github_manager
from typing import Dict, List, Optional
from lib.github.serializers import serialize_datadoc_to_markdown
from logic import github as logic
from logic import datadoc as datadoc_logic
from const.datasources import RESOURCE_NOT_FOUND_STATUS_CODE
Expand Down Expand Up @@ -113,3 +114,35 @@ def get_datadoc_versions(
page = offset // limit + 1
versions = github_client.get_datadoc_versions(page=page)
return versions


@register("/github/datadocs/<int:datadoc_id>/compare/", methods=["GET"])
@with_github_client
def compare_datadoc_versions(
github_client: GitHubClient, datadoc_id: int, commit_sha: str
) -> Dict:
"""
Compare the current DataDoc with a specific commit.
"""
assert_can_read(datadoc_id)
verify_data_doc_permission(datadoc_id)
current_datadoc = datadoc_logic.get_data_doc_by_id(datadoc_id)
api_assert(
current_datadoc is not None,
"Current DataDoc not found",
status_code=RESOURCE_NOT_FOUND_STATUS_CODE,
)
current_markdown = serialize_datadoc_to_markdown(
current_datadoc, exclude_metadata=True
)

# Get the DataDoc content at the specified commit and re-serialize with metadata excluded
commit_datadoc = github_client.get_datadoc_at_commit(commit_sha)
commit_markdown = serialize_datadoc_to_markdown(
commit_datadoc, exclude_metadata=True
)

return {
"current_content": current_markdown,
"commit_content": commit_markdown,
}
126 changes: 80 additions & 46 deletions querybook/server/lib/github/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,74 +17,108 @@ def parse_datetime_as_utc(date_str: Optional[str]) -> datetime:
return datetime.now(timezone.utc).replace(tzinfo=timezone.utc)


def serialize_datadoc_to_markdown(datadoc: DataDoc) -> str:
def serialize_datadoc_to_markdown(
datadoc: DataDoc, exclude_metadata: bool = False
) -> str:
"""
Serialize a DataDoc instance to a Markdown string with YAML front matter.
"""
datadoc_metadata = {
"id": datadoc.id,
"environment_id": datadoc.environment_id,
"public": datadoc.public,
"archived": datadoc.archived,
"owner_uid": datadoc.owner_uid,
"created_at": datadoc.created_at.isoformat() if datadoc.created_at else None,
"updated_at": datadoc.updated_at.isoformat() if datadoc.updated_at else None,
"meta": datadoc.meta,
"title": datadoc.title,
}
try:
front_matter = (
f"---\n{yaml.dump(datadoc_metadata, default_flow_style=False)}---\n\n"
)
except yaml.YAMLError as e:
raise ValueError(f"Error serializing DataDoc metadata to YAML: {e}")
markdown_parts = []

if not exclude_metadata:
datadoc_metadata = {
"id": datadoc.id,
"environment_id": datadoc.environment_id,
"public": datadoc.public,
"archived": datadoc.archived,
"owner_uid": datadoc.owner_uid,
"created_at": (
datadoc.created_at.isoformat() if datadoc.created_at else None
),
"updated_at": (
datadoc.updated_at.isoformat() if datadoc.updated_at else None
),
"meta": datadoc.meta,
"title": datadoc.title,
}
try:
front_matter = (
f"---\n{yaml.dump(datadoc_metadata, default_flow_style=False)}---\n\n"
)
markdown_parts.append(front_matter)

except yaml.YAMLError as e:
raise ValueError(f"Error serializing DataDoc metadata to YAML: {e}")

title = f"# {datadoc.title}\n\n"
content = serialize_datacells(datadoc.cells)
markdown_content = front_matter + title + content
return markdown_content
markdown_parts.append(title)

content = serialize_datacells(
cells=datadoc.cells, exclude_metadata=exclude_metadata
)
markdown_parts.append(content)

return "".join(markdown_parts)

def serialize_datacells(cells: List[DataCell]) -> str:

def serialize_datacells(cells: List[DataCell], exclude_metadata: bool = False) -> str:
"""
Serialize a list of DataCell instances to a Markdown string.
"""
lines = []
cell_strings = []
for cell in cells:
# Since GitHub's Markdown renderer does not recognize multiple --- blocks as separate YAML sections,
# we serialize cell metadata in HTML comment to hide it from rendered view
cell_metadata = {
"id": cell.id,
"cell_type": cell.cell_type.name.lower(),
"created_at": cell.created_at.isoformat() if cell.created_at else None,
"updated_at": cell.updated_at.isoformat() if cell.updated_at else None,
"meta": cell.meta,
}
try:
cell_metadata_yaml = yaml.dump(cell_metadata, default_flow_style=False)
except yaml.YAMLError as e:
raise ValueError(f"Error serializing cell metadata to YAML: {e}")

cell_metadata_comment = f"<!--\n{cell_metadata_yaml.strip()}\n-->\n"
cell_content = serialize_cell_content(
cell=cell, exclude_metadata=exclude_metadata
)

cell_content = serialize_cell_content(cell)
lines.append(cell_metadata_comment + cell_content)
if not exclude_metadata:
# Since GitHub's Markdown renderer does not recognize multiple --- blocks as separate YAML sections,
# we serialize cell metadata in HTML comment to hide it from rendered view
cell_metadata = {
"id": cell.id,
"cell_type": cell.cell_type.name.lower(),
"created_at": cell.created_at.isoformat() if cell.created_at else None,
"updated_at": cell.updated_at.isoformat() if cell.updated_at else None,
"meta": cell.meta,
}
try:
cell_metadata_yaml = yaml.dump(cell_metadata, default_flow_style=False)
except yaml.YAMLError as e:
raise ValueError(f"Error serializing cell metadata to YAML: {e}")

cell_metadata_comment = f"<!--\n{cell_metadata_yaml.strip()}\n-->\n"
cell_strings.append(cell_metadata_comment + cell_content)
else:
cell_strings.append(cell_content)

return "\n\n".join(lines)
return "\n\n".join(cell_strings)


def serialize_cell_content(cell: DataCell) -> str:
def serialize_cell_content(cell: DataCell, exclude_metadata: bool = False) -> str:
"""
Serialize a single DataCell instance to a Markdown string based on its type.
Serialize a single DataCell instance to a Markdown string based on its type
"""
cell_meta = cell.meta or {}

if cell.cell_type == DataCellType.query:
query_title = cell_meta.get("title", "Query")
return f"## Query: {query_title}\n\n```sql\n{cell.context.strip()}\n```\n"
header = f"## Query: {query_title}\n\n"
if exclude_metadata: # Exclude code fences
content = f"{cell.context.strip()}\n"
else:
content = f"```sql\n{cell.context.strip()}\n```\n"
return header + content

elif cell.cell_type == DataCellType.text:
return f"## Text\n\n{cell.context.strip()}\n"
header = "## Text\n\n"
content = f"{cell.context.strip()}\n"
return header + content

elif cell.cell_type == DataCellType.chart:
return "## Chart\n\n*Chart generated from the metadata.*\n"
header = "## Chart\n\n"
content = "*Chart generated from the metadata.*\n"
return header + content

else:
raise ValueError(f"Unknown cell type: {cell.cell_type}")

Expand Down
2 changes: 1 addition & 1 deletion querybook/webapp/components/DataDocGitHub/GitHub.scss
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
flex-direction: column;
height: 100%;
background-color: var(--bg);
padding: 16px;
padding: 8px;
box-sizing: border-box;
overflow-y: auto;
}
Expand Down
65 changes: 49 additions & 16 deletions querybook/webapp/components/DataDocGitHub/GitHubVersions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ import React, { useCallback, useState } from 'react';

import { QueryComparison } from 'components/TranspileQueryModal/QueryComparison';
import { usePaginatedResource } from 'hooks/usePaginatedResource';
import { useResource } from 'hooks/useResource';
import { GitHubResource, ICommit } from 'resource/github';
import { AsyncButton } from 'ui/AsyncButton/AsyncButton';
import { IconButton } from 'ui/Button/IconButton';
import { FeatureDisabledMessage } from 'ui/DisabledSection/FeatureDisabledMessage';
import { ErrorPage } from 'ui/ErrorPage/ErrorPage';
import { Link } from 'ui/Link/Link';
import { Loading } from 'ui/Loading/Loading';
import { Message } from 'ui/Message/Message';

Expand Down Expand Up @@ -42,6 +44,24 @@ export const GitHubVersions: React.FunctionComponent<IProps> = ({
{ batchSize: 5 }
);

const {
data: comparisonData,
isLoading: isComparisonLoading,
isError: isComparisonError,
} = useResource(
React.useCallback(() => {
if (selectedCommit) {
return GitHubResource.compareDataDocVersions(
docId,
selectedCommit.sha
);
}
}, [docId, selectedCommit]),
{
fetchOnMount: !!selectedCommit,
}
);

const handleRestore = useCallback(
async (commitSha: string, commitMessage: string) => {
alert('Restore feature not implemented yet');
Expand All @@ -61,8 +81,8 @@ export const GitHubVersions: React.FunctionComponent<IProps> = ({
setIsCompareOpen(true);
}
} else {
// Handle closing without a version (e.g. clicking close button)
setIsCompareOpen(false);
setSelectedCommit(null);
setIsFullScreen(false);
}
},
Expand Down Expand Up @@ -136,15 +156,17 @@ export const GitHubVersions: React.FunctionComponent<IProps> = ({
{selectedCommit && (
<div className="GitHubVersionsComparePanel">
<div className="panel-header">
<IconButton
icon={'Info'}
size={16}
tooltip={
'Compare the current DataDoc with the selected commit. For a more detailed view of changes, please view it on GitHub.'
}
tooltipPos="left"
className="tooltip"
/>
<Link to={selectedCommit.html_url} newTab>
<IconButton
icon={'Info'}
size={16}
tooltip={
'Compare the current DataDoc with the selected commit. For a more detailed view of changes, please view it on GitHub.'
}
tooltipPos="left"
className="tooltip"
/>
</Link>
<IconButton
icon={isFullScreen ? 'Minimize2' : 'Maximize2'}
onClick={toggleFullScreen}
Expand All @@ -164,12 +186,23 @@ export const GitHubVersions: React.FunctionComponent<IProps> = ({
tooltipPos="left"
/>
</div>
<QueryComparison
fromQuery={'Selected Commit DataDoc Text'}
toQuery={'Current DataDoc Text'}
fromQueryTitle={`Commit: ${selectedCommit.commit.message}`}
toQueryTitle="Current DataDoc"
/>
{isComparisonLoading ? (
<Loading />
) : isComparisonError || !comparisonData ? (
<Message
message="Failed to load comparison. Please try again."
type="error"
icon="AlertTriangle"
iconSize={16}
/>
) : (
<QueryComparison
fromQuery={comparisonData?.current_content}
toQuery={comparisonData?.commit_content}
fromQueryTitle={`Commit: ${selectedCommit.commit.message}`}
toQueryTitle="Current DataDoc"
/>
)}
</div>
)}
</div>
Expand Down
7 changes: 7 additions & 0 deletions querybook/webapp/resource/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,11 @@ export const GitHubResource = {
limit,
offset,
}),
compareDataDocVersions: (docId: number, commitSha: string) =>
ds.fetch<{
current_content: string;
commit_content: string;
}>(`/github/datadocs/${docId}/compare/`, {
commit_sha: commitSha,
}),
};

0 comments on commit 2d0c644

Please sign in to comment.