Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
feat: Adds guideline extraction & validation routes (#35)
Browse files Browse the repository at this point in the history
* refactor: Refactor LLM class

* refactor: Reflected changes

* feat: Adds routes for guideline parsing

* fix: Fixes github client methods

* test: Adds unittests

* style: Fixes typing
  • Loading branch information
frgfm authored Dec 4, 2023
1 parent 0499b63 commit a84fbb6
Show file tree
Hide file tree
Showing 7 changed files with 405 additions and 31 deletions.
6 changes: 4 additions & 2 deletions src/app/api/api_v1/endpoints/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ async def check_code_against_repo_guidelines(
# Fetch guidelines
guideline_list = [elt for elt in await guidelines.fetch_all(("repo_id", repo_id))]
# Run analysis
return openai_client.analyze_multi(payload.code, guideline_list, mode=ExecutionMode.MULTI, user_id=str(user.id))
return openai_client.check_code_against_guidelines(
payload.code, guideline_list, mode=ExecutionMode.MULTI, user_id=str(user.id)
)


@router.post("/check/{guideline_id}", status_code=status.HTTP_200_OK)
Expand All @@ -47,4 +49,4 @@ async def check_code_against_guideline(
user.id, event="compute-check", properties={"repo_id": guideline.repo_id, "guideline_id": guideline_id}
)
# Run analysis
return openai_client.analyze_mono(payload.code, guideline, user_id=str(user.id))
return openai_client.check_code(payload.code, guideline, user_id=str(user.id))
33 changes: 32 additions & 1 deletion src/app/api/api_v1/endpoints/guidelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,19 @@
from app.crud import GuidelineCRUD, RepositoryCRUD
from app.models import Guideline, Repository, UserScope
from app.schemas.base import OptionalGHToken
from app.schemas.guidelines import ContentUpdate, GuidelineCreate, GuidelineCreation, GuidelineEdit, OrderUpdate
from app.schemas.guidelines import (
ContentUpdate,
ExampleRequest,
GuidelineContent,
GuidelineCreate,
GuidelineCreation,
GuidelineEdit,
GuidelineExample,
OrderUpdate,
TextContent,
)
from app.services.github import gh_client
from app.services.openai import openai_client
from app.services.telemetry import telemetry_client

router = APIRouter()
Expand Down Expand Up @@ -100,3 +111,23 @@ async def delete_guideline(
repo = cast(Repository, await repos.get(guideline.repo_id, strict=True))
gh_client.check_user_permission(user, repo.full_name, repo.owner_id, payload.github_token, repo.installed_by)
await guidelines.delete(guideline_id)


@router.post("/parse", status_code=status.HTTP_200_OK)
async def parse_guidelines_from_text(
payload: TextContent,
user=Security(get_current_user, scopes=[UserScope.ADMIN, UserScope.USER]),
) -> List[GuidelineContent]:
telemetry_client.capture(user.id, event="guideline-parse")
# Analyze with LLM
return openai_client.parse_guidelines_from_text(payload.content, user_id=str(user.id))


@router.post("/examples", status_code=status.HTTP_200_OK)
async def generate_examples_for_text(
payload: ExampleRequest,
user=Security(get_current_user, scopes=[UserScope.ADMIN, UserScope.USER]),
) -> GuidelineExample:
telemetry_client.capture(user.id, event="guideline-examples")
# Analyze with LLM
return openai_client.generate_examples_for_instruction(payload.content, payload.language, user_id=str(user.id))
36 changes: 35 additions & 1 deletion src/app/api/api_v1/endpoints/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# All rights reserved.
# Copying and/or distributing is strictly prohibited without the express permission of its copyright owner.

import logging
from base64 import b64decode
from datetime import datetime
from typing import List, cast

Expand All @@ -12,12 +14,14 @@
from app.crud import GuidelineCRUD, RepositoryCRUD
from app.models import Guideline, Repository, User, UserScope
from app.schemas.base import OptionalGHToken
from app.schemas.guidelines import OrderUpdate
from app.schemas.guidelines import OrderUpdate, ParsedGuideline
from app.schemas.repos import GuidelineOrder, RepoCreate, RepoCreation, RepoUpdate
from app.services.github import gh_client
from app.services.openai import openai_client
from app.services.slack import slack_client
from app.services.telemetry import telemetry_client

logger = logging.getLogger("uvicorn.error")
router = APIRouter()


Expand Down Expand Up @@ -151,6 +155,36 @@ async def fetch_guidelines_from_repo(
return [elt for elt in await guidelines.fetch_all(("repo_id", repo_id))]


@router.post("/{repo_id}/parse", status_code=status.HTTP_200_OK)
async def parse_guidelines_from_github(
payload: OptionalGHToken,
repo_id: int = Path(..., gt=0),
repos: RepositoryCRUD = Depends(get_repo_crud),
user=Security(get_current_user, scopes=[UserScope.ADMIN, UserScope.USER]),
) -> List[ParsedGuideline]:
telemetry_client.capture(user.id, event="repo-parse-guidelines", properties={"repo_id": repo_id})
# Sanity check
repo = cast(Repository, await repos.get(repo_id, strict=True))
# STATIC CONTENT
# Parse CONTRIBUTING (README if CONTRIBUTING doesn't exist)
contributing = gh_client.get_file(repo.full_name, "CONTRIBUTING.md", payload.github_token)
# readme = gh_client.get_readme(payload.github_token)
# diff_hunk, body, path
# comments = gh_client.list_review_comments(payload.github_token)
# Ideas: filter on pulls with highest amount of comments recently, add the review output rejection/etc
# If not enough information, raise error
if contributing is None:
raise HTTPException(status.HTTP_404_NOT_FOUND, detail="No useful information is accessible in the repository")
# Analyze with LLM
contributing_guidelines = openai_client.parse_guidelines_from_text(
b64decode(contributing["content"]).decode(), user_id=str(user.id)
)
return [
ParsedGuideline(**guideline.dict(), repo_id=repo_id, origin_path=contributing["path"])
for guideline in contributing_guidelines
]


@router.post("/{repo_id}/waitlist", status_code=status.HTTP_200_OK)
async def add_repo_to_waitlist(
repo_id: int = Path(..., gt=0),
Expand Down
18 changes: 18 additions & 0 deletions src/app/schemas/guidelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,29 @@
__all__ = ["GuidelineCreate", "GuidelineEdit", "ContentUpdate", "OrderUpdate"]


class TextContent(BaseModel):
content: str = Field(..., min_length=10)


class ExampleRequest(TextContent):
language: str = Field("python", min_length=1, max_length=20)


class GuidelineExample(BaseModel):
positive: str = Field(..., min_length=3)
negative: str = Field(..., min_length=3)


class GuidelineContent(BaseModel):
title: str = Field(..., min_length=6, max_length=100)
details: str = Field(..., min_length=6, max_length=1000)


class ParsedGuideline(GuidelineContent):
repo_id: int = Field(..., gt=0)
origin_path: str


class GuidelineLocation(BaseModel):
repo_id: int = Field(..., gt=0)
order: int = Field(0, ge=0, nullable=False)
Expand Down
68 changes: 57 additions & 11 deletions src/app/services/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# All rights reserved.
# Copying and/or distributing is strictly prohibited without the express permission of its copyright owner.

from typing import Any, Dict, Union
import logging
from typing import Any, Dict, List, Union

import requests
from fastapi import HTTPException, status
Expand All @@ -13,6 +14,8 @@
from app.models import User, UserScope
from app.schemas.services import GHToken, GHTokenRequest

logger = logging.getLogger("uvicorn.error")

__all__ = ["gh_client"]


Expand All @@ -23,35 +26,46 @@ class GitHubClient:
def __init__(self, token: Union[str, None] = None) -> None:
self.headers = self._get_headers(token)

def _get_headers(self, token: Union[str, None] = None) -> Dict[str, str]:
@staticmethod
def _get_headers(token: Union[str, None] = None) -> Dict[str, str]:
if isinstance(token, str):
return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
return {"Content-Type": "application/json"}

def _get(self, route: str, token: Union[str, None] = None, timeout: int = 5) -> Dict[str, Any]:
def _get(
self,
route: str,
token: Union[str, None] = None,
timeout: int = 5,
status_code_tolerance: Union[int, None] = None,
**kwargs: Any,
) -> requests.Response:
response = requests.get(
f"{self.ENDPOINT}/{route}",
headers=self._get_headers(token) if isinstance(token, str) else self.headers,
timeout=timeout,
params=kwargs,
)
json_response = response.json()
if response.status_code != status.HTTP_200_OK:
if response.status_code != status.HTTP_200_OK and (
status_code_tolerance is None or response.status_code != status_code_tolerance
):
json_response = response.json()
raise HTTPException(
status_code=response.status_code, detail=json_response.get("error", json_response["message"])
)
return json_response
return response

def get_repo(self, repo_id: int, **kwargs: Any) -> Dict[str, Any]:
return self._get(f"repositories/{repo_id}", **kwargs)
return self._get(f"repositories/{repo_id}", **kwargs).json()

def get_user(self, user_id: int, **kwargs: Any) -> Dict[str, Any]:
return self._get(f"user/{user_id}", **kwargs)
return self._get(f"user/{user_id}", **kwargs).json()

def get_my_user(self, token: str) -> Dict[str, Any]:
return self._get("user", token)
return self._get("user", token).json()

def get_permission(self, repo_name: str, user_name: str, github_token: str) -> str:
return self._get(f"repos/{repo_name}/collaborators/{user_name}/permission", github_token)["role_name"]
def get_permission(self, repo_name: str, user_name: str, token: str) -> str:
return self._get(f"repos/{repo_name}/collaborators/{user_name}/permission", token).json()["role_name"]

def check_user_permission(
self,
Expand Down Expand Up @@ -94,5 +108,37 @@ def get_token_from_code(self, code: str, redirect_uri: HttpUrl, timeout: int = 5
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=response.json()["error_description"])
return GHToken(**response.json())

def get_readme(self, repo_name: str, token: Union[str, None] = None) -> Union[Dict[str, Any], None]:
# https://docs.github.com/en/rest/repos/contents#get-a-repository-readme
response = self._get(f"repos/{repo_name}/readme", token, status_code_tolerance=status.HTTP_404_NOT_FOUND)
return response.json() if response.status_code != status.HTTP_404_NOT_FOUND else None

def get_file(self, repo_name: str, file_path: str, token: Union[str, None] = None) -> Union[Dict[str, Any], None]:
# https://docs.github.com/en/rest/repos/contents#get-repository-content
response = self._get(
f"repos/{repo_name}/contents/{file_path}", token, status_code_tolerance=status.HTTP_404_NOT_FOUND
)
return response.json() if response.status_code != status.HTTP_404_NOT_FOUND else None

def list_pulls(self, repo_name: str, token: Union[str, None] = None, per_page: int = 30) -> List[Dict[str, Any]]:
# https://docs.github.com/en/rest/pulls/pulls#list-pull-requests
return self._get(
f"repos/{repo_name}/pulls",
token,
state="closed",
sort="popularity",
direction="desc",
base=self._get(f"repos/{repo_name}", token).json()["default_branch"],
per_page=per_page,
).json()

def list_review_comments(self, repo_name: str, token: Union[str, None] = None):
# https://docs.github.com/en/rest/pulls/comments#list-review-comments-in-a-repository
comments = self._get(
f"repos/{repo_name}/pulls/comments", token, sort="created_at", direction="desc", per_page=100
).json()
# Get comments (filter account type == user, & user != author) --> take diff_hunk, body, path
return [comment for comment in comments if comment["user"]["type"] == "User"]


gh_client = GitHubClient(settings.GH_TOKEN)
Loading

0 comments on commit a84fbb6

Please sign in to comment.