Skip to content

Commit

Permalink
adjust rule
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 14, 2024
1 parent 3112d3f commit 0496ac1
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 9 deletions.
46 changes: 46 additions & 0 deletions src/dandi_s3_log_parser/_globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,52 @@
"WEBSITE.GET.OBJECT",
"REST.GET.BUCKETVERSIONS",
"REST.GET.BUCKET",
"BATCH.DELETE.OBJECT",
"REST.COPY.OBJECT_GET",
"REST.COPY.PART",
"REST.DELETE.OBJECT",
"REST.DELETE.OBJECT_TAGGING",
"REST.DELETE.UPLOAD",
"REST.GET.ACCELERATE",
"REST.GET.ACL",
"REST.GET.ANALYTICS",
"REST.GET.BUCKET",
"REST.GET.BUCKETPOLICY",
"REST.GET.BUCKETVERSIONS",
"REST.GET.CORS",
"REST.GET.ENCRYPTION",
"REST.GET.INTELLIGENT_TIERING",
"REST.GET.INVENTORY",
"REST.GET.LIFECYCLE",
"REST.GET.LOCATION",
"REST.GET.LOGGING_STATUS",
"REST.GET.METRICS",
"REST.GET.NOTIFICATION",
"REST.GET.OBJECT",
"REST.GET.OBJECT_LOCK_CONFIGURATION",
"REST.GET.OBJECT_TAGGING",
"REST.GET.OWNERSHIP_CONTROLS",
"REST.GET.POLICY_STATUS",
"REST.GET.PUBLIC_ACCESS_BLOCK",
"REST.GET.REPLICATION",
"REST.GET.REQUEST_PAYMENT",
"REST.GET.TAGGING",
"REST.GET.UPLOAD",
"REST.GET.VERSIONING",
"REST.GET.WEBSITE",
"REST.HEAD.BUCKET",
"REST.HEAD.OBJECT",
"REST.OPTIONS.PREFLIGHT",
"REST.POST.BUCKET",
"REST.POST.MULTI_OBJECT_DELETE",
"REST.POST.OBJECT",
"REST.POST.UPLOAD",
"REST.POST.UPLOADS",
"REST.PUT.ACL",
"REST.PUT.BUCKETPOLICY",
"REST.PUT.OBJECT",
"REST.PUT.OWNERSHIP_CONTROLS",
"REST.PUT.PART",
)

_IS_OPERATION_TYPE_KNOWN = collections.defaultdict(bool)
Expand Down
19 changes: 10 additions & 9 deletions src/dandi_s3_log_parser/_log_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def find_all_known_operation_types(
base_raw_s3_log_folder_path: DirectoryPath,
excluded_log_files: list[FilePath] | None,
max_files: int | None = 100,
max_files: int | None = None,
) -> set:
base_raw_s3_log_folder_path = pathlib.Path(base_raw_s3_log_folder_path)
excluded_log_files = excluded_log_files or {}
Expand All @@ -27,13 +27,14 @@ def find_all_known_operation_types(
position=0,
leave=True,
):
operation_types_per_file = {
field[7]
for buffered_text_reader in BufferedTextReader(file_path=raw_s3_log_file_path)
for raw_log_line in buffered_text_reader
if len((field := raw_log_line[:500].split(" "))) > 7
}

unique_operation_types.update(operation_types_per_file)
for buffered_text_reader in BufferedTextReader(file_path=raw_s3_log_file_path):
slice_bound = 200
for raw_log_line in buffered_text_reader:
fields = raw_log_line[:slice_bound].split(" ")
while len(fields) < 7:
slice_bound += 100
fields = raw_log_line[:slice_bound].split(" ")
field = fields[7]
unique_operation_types.add(field)

return unique_operation_types

0 comments on commit 0496ac1

Please sign in to comment.