Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deserialization of URL #177

Merged
merged 1 commit into from
Dec 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def extract_content_from_html(html: str) -> str:
return content


def get_robots_txt_url(url: AnyUrl | str) -> str:
def get_robots_txt_url(url: str) -> str:
"""Get the robots.txt URL for a given website URL.

Args:
Expand All @@ -54,15 +54,15 @@ def get_robots_txt_url(url: AnyUrl | str) -> str:
URL of the robots.txt file
"""
# Parse the URL into components
parsed = urlparse(str(url))
parsed = urlparse(url)

# Reconstruct the base URL with just scheme, netloc, and /robots.txt path
robots_url = urlunparse((parsed.scheme, parsed.netloc, "/robots.txt", "", "", ""))

return robots_url


async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -> None:
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Raises a McpError if not.
Expand Down Expand Up @@ -106,7 +106,7 @@ async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -


async def fetch_url(
url: AnyUrl | str, user_agent: str, force_raw: bool = False
url: str, user_agent: str, force_raw: bool = False
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
Expand All @@ -116,7 +116,7 @@ async def fetch_url(
async with AsyncClient() as client:
try:
response = await client.get(
str(url),
url,
follow_redirects=True,
headers={"User-Agent": user_agent},
timeout=30,
Expand Down Expand Up @@ -221,7 +221,7 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
except ValueError as e:
raise McpError(INVALID_PARAMS, str(e))

url = args.url
url = str(args.url)
if not url:
raise McpError(INVALID_PARAMS, "URL is required")

Expand Down