Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature - Manage payloads #2989

Merged
merged 6 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 112 additions & 11 deletions app/api/v2/handlers/payload_api.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import asyncio
import itertools
import os
import pathlib
from io import IOBase

import aiohttp_apispec
from aiohttp import web
import marshmallow as ma

from app.api.v2.handlers.base_api import BaseApi
from app.api.v2.schemas.base_schemas import BaseGetAllQuerySchema


class PayloadSchema(ma.Schema):
payloads = ma.fields.List(ma.fields.String())
from app.api.v2.schemas.payload_schemas import PayloadQuerySchema, PayloadSchema, PayloadCreateRequestSchema, \
PayloadDeleteRequestSchema


class PayloadApi(BaseApi):
Expand All @@ -22,22 +21,124 @@ def __init__(self, services):
def add_routes(self, app: web.Application):
router = app.router
router.add_get('/payloads', self.get_payloads)
router.add_post("/payloads", self.post_payloads)
router.add_delete("/payloads/{name}", self.delete_payloads)

@aiohttp_apispec.docs(tags=['payloads'],
summary='Retrieve payloads',
description='Retrieves all stored payloads.')
@aiohttp_apispec.querystring_schema(BaseGetAllQuerySchema)
@aiohttp_apispec.querystring_schema(PayloadQuerySchema)
@aiohttp_apispec.response_schema(PayloadSchema(),
description='Returns a list of all payloads in PayloadSchema format.')
async def get_payloads(self, request: web.Request):
sort: bool = request['querystring'].get('sort')
exclude_plugins: bool = request['querystring'].get('exclude_plugins')
add_path: bool = request['querystring'].get('add_path')

cwd = pathlib.Path.cwd()
payload_dirs = [cwd / 'data' / 'payloads']
payload_dirs.extend(cwd / 'plugins' / plugin.name / 'payloads'
for plugin in await self.data_svc.locate('plugins') if plugin.enabled)

if not exclude_plugins:
payload_dirs.extend(cwd / 'plugins' / plugin.name / 'payloads'
for plugin in await self.data_svc.locate('plugins') if plugin.enabled)

payloads = {
self.file_svc.remove_xored_extension(p.name)
str(p.parent.relative_to(cwd) / self.file_svc.remove_xored_extension(p.name))
if add_path
else self.file_svc.remove_xored_extension(p.name)
for p in itertools.chain.from_iterable(p_dir.glob('[!.]*') for p_dir in payload_dirs)
if p.is_file()
}

return web.json_response(list(payloads))
payloads = list(payloads)
if sort:
payloads.sort()

return web.json_response(payloads)

@aiohttp_apispec.docs(
tags=['payloads'],
summary='Create a payload',
description='Uploads a payload.')
@aiohttp_apispec.form_schema(PayloadCreateRequestSchema)
@aiohttp_apispec.response_schema(
PayloadSchema(),
description="The created payload in a list in PayloadSchema format (with name changed in case of a duplicate).")
async def post_payloads(self, request: web.Request):
# As aiohttp_apispec.form_schema already calls request.multipart(),
# accessing the file using the prefilled request["form"] dictionary.
file_field: web.FileField = request["form"]["file"]

file_name, file_path = await self.__generate_file_name_and_path(file_field)

# The file_field.file is of type IOBase: It uses blocking methods.
# Putting blocking code into a dedicated method and thread...
loop: asyncio.AbstractEventLoop = asyncio.get_event_loop()
await loop.run_in_executor(None, self.__save_file, file_path, file_field.file)

body: dict[list[str]] = {"payloads": [file_name]}
return web.json_response(body)

@aiohttp_apispec.docs(
tags=['payloads'],
summary='Delete a payload',
description='Deletes a given payload.',
responses = {
204: {"description": "Payload has been properly deleted."},
404: {"description": "Payload not found."},
})
@aiohttp_apispec.match_info_schema(PayloadDeleteRequestSchema)
async def delete_payloads(self, request: web.Request):
file_name: str = request.match_info.get("name")
file_path: str = os.path.join('data/payloads/', file_name)

response: web.HTTPException = None
try:
os.remove(file_path)
response = web.HTTPNoContent()
except FileNotFoundError:
response = web.HTTPNotFound()
return response

@classmethod
async def __generate_file_name_and_path(cls, file_field: web.FileField) -> [str, str]:
"""
Finds whether an uploaded file already exists in the payload directory.
In the case, generates a new file name with an incremental suffix to avoid overriding the existing one.
Otherwise, the original file name is used.

:param file_field: The upload payload object.
:return: A tuple containing the generated file name and path for future storage.
"""
file_name_candidate: str = file_field.filename
file_path: str = os.path.join('data/payloads/', file_name_candidate)
suffix: int = 1

# Generating a file suffix in the case it already exists.
while os.path.exists(file_path):
file_name_candidate = f"{pathlib.Path(file_field.filename).stem}_" \
f"{suffix}{pathlib.Path(file_field.filename).suffix}"
file_path = os.path.join('data/payloads/', file_name_candidate)
suffix += 1
file_name: str = file_name_candidate
return file_name, file_path

@staticmethod
def __save_file(target_file_path: str, io_base_src: IOBase):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies, didnt notice this before but is this file saving method necessary over just using the file saving method from the File service (https://github.com/mitre/caldera/blob/master/app/service/file_svc.py#L65)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elegantmoose, relevant remark.

Actually, it would be good to use it but I cannot directly.
When defining the post_payloads() method (which indirectly calls __save_file()), I used the @aiohttp_apispec.form_schema(PayloadCreateRequestSchema) annotation in order to have a proper swagger form and documentation.

However, this annotation (hidden function) calls request.multipart() before I even enter the post_payloads() method, to check the form according to the given schema.
Then the file can be read from another special request dictionary object (see aiohttp_apispec documentation).
I cannot read twice the multipart form using standard aiohttp methods because the second time, I get no data.

Therefore, I cannot the file_svc.save_multipart_file_upload() or file_svc.save_file() directly because they assume nothing has called the aiohttp multipart methods yet.

However, I can try to do one of the following changes:

  1. Adapt the current file_svc.save_multipart_file_upload() so that according to the given parameters, the file will be read the aiohttp standard way or using a special aiohttp_apispec object produced by the @aiohttp_apispec.form_schema annotation,
  2. Add another dedicated method to handle the aiohttp_apispec file upload case.

In both cases, I would try to use as maximum common code as possible.

Note that the file_svc.save_multipart_file_upload() method reads entirely the uploaded file and gives it as a payload parameter to file_svc.save_file(), which can lead to performance issues in the case of big files, but that's another story.

So tell me what you prefer (including another proposal).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like 2.

But only if not a big lift. Can you take a quick look but cap your effort, dont want to hold this PR up.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elegantmoose, I had a look to the file_svc.py code.
It handles general file upload for other services or the (old v1?) API (I'm new to Cladera and I don't have the whole history).

Beside, this payload upload code looks the first API v2 file upload implementation (what is more, using aiohttp_apispec for swagger integration), and there are some global differences:

  • file_svc.py (v1) handles file encoding (HTTP header) and encryption for storage as my v2 code doesn't,
  • file_svc.py (v1) looks to handle some particular cases (file storage path, file extension),
  • The v2 code follows the aiohttp_apispec to handle uploaded file, so the way the file is read is different than in file_svc.py (however, the implementation I made should supports big file).

Finally, I'm running out of time for this work.

Therefore, I propose the following:

  1. I only change the multipart form field name from payload to file to make my code as generic as possible,
  2. If you agree, you merge this PR, and one nows that there is a base code for general API v2 file upload using aiohttp_apispec,
  3. A future and distinct task / user story will handle a migration from v1 file upload to v2 in file_svc.py, with all features (file encoding, file encryption, aiohttp_apispec constraints for swagger).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elegantmoose, I pushed the field name change (see point 1. above).

I saw you merged from master in the feature/manage-payloads branch: I can rebase it (+ git push --force) to avoid a complex history if you want before validating this PR.

I'll create another PR for the GUI part (magma repository).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay yea, too much of additional lift for this PR. Thanks for looking into though.

Yea, feel free to rebase it.

Im happy with this PR, I would just like @clenk to do quick look over.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elegantmoose, branch rebased and force pushed.

"""
Save an uploaded file content into a targeted file path.
Note this method calls blocking methods and must be run into a dedicated thread.

:param target_file_path: The destination path to write to.
:param io_base_src: The stream with file content to read from.
"""
size: int = 0
read_chunk: bool = True
with open(target_file_path, 'wb') as buffered_io_base_dest:
while read_chunk:
chunk: bytes = io_base_src.read(8192)
if chunk:
size += len(chunk)
buffered_io_base_dest.write(chunk)
else:
read_chunk = False
19 changes: 19 additions & 0 deletions app/api/v2/schemas/payload_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from marshmallow import fields, schema


class PayloadQuerySchema(schema.Schema):
sort = fields.Boolean(required=False, default=False)
exclude_plugins = fields.Boolean(required=False, default=False)
add_path = fields.Boolean(required=False, default=False)


class PayloadSchema(schema.Schema):
payloads = fields.List(fields.String())


class PayloadCreateRequestSchema(schema.Schema):
file = fields.Raw(type="file", required=True)


class PayloadDeleteRequestSchema(schema.Schema):
name = fields.String(required=True)
Loading