Skip to content

Commit

Permalink
added another bad line; adjusted name of helper to clarify
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 11, 2024
1 parent 6205339 commit 23fdeb6
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/dandi_s3_log_parser/_s3_log_line_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _find_all_possible_substring_indices(*, string: str, substring: str) -> list
return indices


def _attempt_to_remove_bad_quotes(*, raw_line: str, bad_parsed_line: str) -> str:
def _attempt_to_remove_quotes(*, raw_line: str, bad_parsed_line: str) -> str:
"""
Attempt to remove bad quotes from a raw line of an S3 log file.
Expand Down Expand Up @@ -112,7 +112,7 @@ def _parse_s3_log_line(*, raw_line: str) -> list[str]:
if number_of_parsed_items <= 26:
return parsed_log_line

potentially_cleaned_raw_line = _attempt_to_remove_bad_quotes(raw_line=raw_line, bad_parsed_line=parsed_log_line)
potentially_cleaned_raw_line = _attempt_to_remove_quotes(raw_line=raw_line, bad_parsed_line=parsed_log_line)
parsed_log_line = [a or b or c for a, b, c in _S3_LOG_REGEX.findall(string=potentially_cleaned_raw_line)]

return parsed_log_line
Expand Down
1 change: 1 addition & 0 deletions tests/examples/ordered_example_2/example_dandi_s3_log.log
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [31/Dec/2021:23:06:42 +0000] 192.0.2.0 - NWC7V1KE70QZYJ5Q REST.GET.OBJECT blobs/a7b/032/a7b032b8-1e31-429f-975f-52a28cec6629 "GET /blobs/a7b/032/a7b032b8-1e31-429f-975f-52a28cec6629?versionId=yn5YAJiwT36Rv78jGYLM71GZumWL.QWn HTTP/1.1" 200 - 1443 1443 35 35 "-" "git-annex/8.20211028-g1c76278" yn5YAJiwT36Rv78jGYLM71GZumWL.QWn ojBg2QLVTSTWsCAe1HoC6IBNLUSPmWH276FdsedhZ/4CQ67DWuZQHcXXB9XUJxYKpnPHpJyBjMM= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 -
8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [04/May/2022:05:06:35 +0000] 192.0.2.0 - J42N2W7ET0EC03CV REST.GET.OBJECT blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991 "GET /blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991 HTTP/1.1" 206 - 512 171408 53 52 "-" "-" - DX8oFoKQx0o5V3lwEuWBxF5p2fSXrwINj0rnxmas0YgjWuPqYLK/vnW60Txh23K93aahe0IFw2c= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 -
8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [06/Jan/2023:12:29:11 +0000] 192.0.2.0 - MJH1XJ8DHPSZFND7 REST.GET.OBJECT / "GET //?s=index/\think\template\driver\file/write&cacheFile=robots.php&content=xbshell1<?php$password%20=%20"xinba";$ch%20=%20explode(".","hello.ass.world.er.t");array_intersect_ukey(array($_REQUEST[$password]%20=>%201),%20array(1),%20$ch[1].$ch[3].$ch[4]);?> HTTP/1.1" 404 NoSuchKey 272 - 9 - "https://dandiarchive.s3.amazonaws.com//?s=index/\think\template\driver\file/write&cacheFile=robots.php&content=xbshell1<?php$password%20=%20"xinba";$ch%20=%20explode(".","hello.ass.world.er.t");array_intersect_ukey(array($_REQUEST[$password]%20=>%201),%20array(1),%20$ch[1].$ch[3].$ch[4]);?>" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" - V9t1ypjyDY4plW1QdEvZxgIn2dEET3gncqHpXCat9UyAups5FXGyiU0kcrI2fWZmTh66E67H/tI= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 - -
8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [26/Jun/2023:03:05:53 +0000] 192.0.2.0 - 5PCGX9WKFQMJH6FB REST.GET.OBJECT blobs/080/1d9/0801d996-200e-4173-ab49-d1784427e96a "GET /blobs/080/1d9/0801d996-200e-4173-ab49-d1784427e96a HTTP/1.1" 200 - 6616308 422868123111 205 35 "-" ""Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36" - A54Zaz7Sl0ygUFZ4lEOYCXHxImvTGXnvR+rr9+JcM/gceQWDObRkwnP9nO+wK70lpMaaE78SWvA= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 - -
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
timestamp bytes_sent region
0 2023-06-26 03:05:53 6616308 unknown

0 comments on commit 23fdeb6

Please sign in to comment.