From deaaa7a49d824168720f85498c8ac3099e25d172 Mon Sep 17 00:00:00 2001 From: Andre Kuhlenschmidt Date: Thu, 26 Sep 2024 18:56:54 -0700 Subject: [PATCH 1/3] chore: fix uses of deprecated function utcnow (semgrep/semgrep-proprietary#2339) Error in [logs](https://github.com/semgrep/semgrep-proprietary/actions/runs/11061470331/job/30734143194#step:4:129) Test plan: CI synced from Pro 6ec5e0f7832e85e8c2f6a6a178de494fee102895 --- cli/src/semgrep/app/scans.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cli/src/semgrep/app/scans.py b/cli/src/semgrep/app/scans.py index 06865a6d68ac..a45352f77890 100644 --- a/cli/src/semgrep/app/scans.py +++ b/cli/src/semgrep/app/scans.py @@ -484,15 +484,16 @@ def report_findings( # minutes to wait for completion. Eventually, this wait may # be configurable as we see larger scans and increased backend # load. - try_until = datetime.utcnow() + timedelta(minutes=30) - slow_down_after = datetime.utcnow() + timedelta(minutes=2) + now = datetime.now().replace(tzinfo=None) + try_until = now + timedelta(minutes=30) + slow_down_after = now + timedelta(minutes=2) while True: # old: was also logging {json.dumps(complete.to_json(), indent=4)} # alt: save it in ~/.semgrep/logs/complete.json? logger.debug(f"Sending /complete") - if datetime.utcnow() > try_until: + if datetime.now().replace(tzinfo=None) > try_until: # let the backend know we won't be trying again complete.final_attempt = True @@ -522,4 +523,4 @@ def report_findings( ) progress_bar.advance(complete_task) - sleep(5 if datetime.utcnow() < slow_down_after else 30) + sleep(5 if datetime.now().replace(tzinfo=None) < slow_down_after else 30) From 0023351417e3112f78bd9d5602591c40fa4d7cd9 Mon Sep 17 00:00:00 2001 From: Emma Jin Date: Fri, 27 Sep 2024 13:28:57 -0400 Subject: [PATCH 2/3] chore: log the trace id in debug mode (semgrep/semgrep-proprietary#2337) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non-SMS scans, it can be quite hard to find the trace. This PR logs the trace id in debug log. This only solves the problem when `--debug` is passed as well as `--trace`, so for the future we should output it as part of the results json or the error on a crash. However, it's better than the previous state. Test plan: In an arbitrary folder, run ``` (python-virtualenv) ➜ misc semgrep --config p/default . --trace --pro --debug ┌──── ○○○ ────┐ │ Semgrep CLI │ └─────────────┘ semgrep version 1.90.0 ... [00.06][DEBUG](default): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [00.06][INFO]: Executed as: /Users/emma/workspace/semgrep-proprietary/OSS/cli/src/semgrep/bin/semgrep-core-proprietary -json -rules /var/folders/4h/r6m5kls56r98069rz7w7f2400000gp/T/tmp379zh00m.json -j 1 -targets /var/folders/4h/r6m5kls56r98069rz7w7f2400000gp/T/tmp6sce110s -timeout 5 -timeout_threshold 3 -max_memory 0 -fast -trace -deep_inter_file -timeout_for_interfile_analysis 0 . -debug [00.06][INFO]: Version: 1.90.0 [00.06][INFO]: Tracing is enabled for this scan. The trace id is <53914f7e3a7612f09483a98c320a8f98>. ``` Also run it without `--pro`. synced from Pro 7a7dba2f40afef0123d56915efca502917536f7d --- libs/tracing/unix/Tracing.ml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libs/tracing/unix/Tracing.ml b/libs/tracing/unix/Tracing.ml index 27d9073fc39a..bb3873b2db64 100644 --- a/libs/tracing/unix/Tracing.ml +++ b/libs/tracing/unix/Tracing.ml @@ -201,6 +201,18 @@ let trace_data_only ?(level = Info) ~__FUNCTION__ ~__FILE__ ~__LINE__ name with_span ~level ~__FUNCTION__ ~__FILE__ ~__LINE__ name (fun sp -> f () |> add_yojson_to_span sp) +let log_trace_message () = + match Otel.Scope.get_ambient_scope () with + | None -> + (* nosemgrep: no-logs-in-library *) + Logs.info (fun m -> + m "Tracing is enabled for this scan. There was no trace id recorded.") + | Some scope -> + let id = Otel.Trace_id.to_hex scope.trace_id in + (* nosemgrep: no-logs-in-library *) + Logs.info (fun m -> + m "Tracing is enabled for this scan. The trace id is <%s>." id) + (*****************************************************************************) (* Entry points for setting up tracing *) (*****************************************************************************) @@ -241,7 +253,9 @@ let with_tracing fname trace_endpoint data f = Opentelemetry_client_ocurl.with_setup ~config () @@ fun () -> with_top_level_span ?parent_span_id ?parent_trace_id ~__FILE__ ~__LINE__ ~data fname - @@ fun sp -> f sp + @@ fun sp -> + log_trace_message (); + f sp (* Alt: using cohttp_lwt (we probably want to do this when we switch to Eio w/ *) (* their compatibility layer) From 5c080ddbc8afa3ef55cb84b9701212a284261a8b Mon Sep 17 00:00:00 2001 From: Andre Kuhlenschmidt Date: Mon, 30 Sep 2024 13:54:54 -0700 Subject: [PATCH 3/3] fix: deprecation errors with Uuidm for ocaml 5.2 (semgrep/semgrep-proprietary#2344) Fixing [logs](https://github.com/semgrep/semgrep-proprietary/actions/runs/11078022721/job/30784508699). Looks like this doesn't work for our 4.x ocaml environment. Good luck guys! synced from Pro 2ea8d06dfac60d2c0fd582450f6ba62bad0beb54 --- Makefile | 13 ++++++++++++- dune-project | 3 ++- libs/git_wrapper/Git_wrapper.ml | 3 ++- semgrep.opam | 6 +++--- src/osemgrep/cli_ci/Ci_subcommand.ml | 2 +- src/osemgrep/configuring/Semgrep_settings.ml | 3 ++- src/osemgrep/core/Metrics_.ml | 3 ++- src/osemgrep/language_server/Test_LS_e2e.ml | 4 +++- src/osemgrep/language_server/Unit_LS.ml | 2 +- src/osemgrep/language_server/server/Lsp_.ml | 8 ++++++-- src/osemgrep/networking/Semgrep_login.ml | 3 ++- src/osemgrep/reporting/Gitlab_output.ml | 2 +- 12 files changed, 37 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index d120162afbdc..1f0e47b82114 100644 --- a/Makefile +++ b/Makefile @@ -237,7 +237,18 @@ core-test-e2e: # path, then recent versions of opam crash with a 'git ls-files fatal error' # about some 'libs/ocaml-tree-sitter-core/../../.git/...' not being a git # repo. -REQUIRED_DEPS = ./ ./libs/ocaml-tree-sitter-core/tree-sitter.opam ./dev/required.opam +# +# EXTRA_OPAM_DEPS allows us to add more opam files when building semgrep +# as part of a larger project (e.g. semgrep-proprietary). Using a single +# 'opam install' command to install all the dependencies allows us to detect +# version constraints incompatibilities. +# +REQUIRED_DEPS = \ + ./ \ + ./libs/ocaml-tree-sitter-core/tree-sitter.opam \ + ./dev/required.opam \ + $(EXTRA_OPAM_DEPS) + OPTIONAL_DEPS = $(REQUIRED_DEPS) ./dev/optional.opam # This target is portable; it only assumes you have 'gcc', 'opam' and diff --git a/dune-project b/dune-project index 3e03f5c0ff3b..b64d46efb9c4 100644 --- a/dune-project +++ b/dune-project @@ -77,6 +77,7 @@ the other programming languages supported by atdgen." (lib_parsing (>= 1.5.5)) (profiling (>= 1.5.5)) (atdgen (>= 2.8.0)) + (uuidm (>= 0.9.9)) ) ) @@ -515,7 +516,7 @@ For more information see https://semgrep.dev (conf-libcurl (= 1)) ; force older version of conf-libcurl to make windows work ; web stuff uri - uuidm + (uuidm (>= 0.9.9)) ; cohttp >= 6.0.0 requires opam 2.1.0 which used to not available in Windows ; TODO: now that opam 2.2.0 is out and support windows, upgrade to 6.0.0 (cohttp (= 5.3.0)) diff --git a/libs/git_wrapper/Git_wrapper.ml b/libs/git_wrapper/Git_wrapper.ml index 151864b20436..97e0f9400407 100644 --- a/libs/git_wrapper/Git_wrapper.ml +++ b/libs/git_wrapper/Git_wrapper.ml @@ -501,7 +501,8 @@ let run_with_worktree (caps : < Cap.chdir ; Cap.tmp >) ~commit ?branch f = | None -> raise (Error "") in let rand_dir () = - let uuid = Uuidm.v `V4 in + let rand = Stdlib.Random.State.make_self_init () in + let uuid = Uuidm.v4_gen rand () in let dir_name = "semgrep_git_worktree_" ^ Uuidm.to_string uuid in let dir = CapTmp.get_temp_dir_name caps#tmp / dir_name in UUnix.mkdir !!dir 0o777; diff --git a/semgrep.opam b/semgrep.opam index c0497637d6d8..9d107e9a8a04 100644 --- a/semgrep.opam +++ b/semgrep.opam @@ -1,6 +1,6 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -version: "1.79.0" +version: "1.90.0" synopsis: "Like grep but for code: fast and syntax-aware semantic code pattern for many languages" description: """ @@ -12,7 +12,7 @@ For more information see https://semgrep.dev """ maintainer: ["Yoann Padioleau "] authors: ["Yoann Padioleau "] -license: "LGPL-2.1" +license: "LGPL-2.1-only" homepage: "https://semgrep.dev" bug-reports: "https://github.com/semgrep/semgrep/issues" depends: [ @@ -65,7 +65,7 @@ depends: [ "ambient-context-lwt" "conf-libcurl" {= "1"} "uri" - "uuidm" + "uuidm" {>= "0.9.9"} "cohttp" {= "5.3.0"} "cohttp-lwt-unix" "cohttp-lwt-jsoo" diff --git a/src/osemgrep/cli_ci/Ci_subcommand.ml b/src/osemgrep/cli_ci/Ci_subcommand.ml index b01b2b2e005d..e457c3092307 100644 --- a/src/osemgrep/cli_ci/Ci_subcommand.ml +++ b/src/osemgrep/cli_ci/Ci_subcommand.ml @@ -201,7 +201,7 @@ let scan_config_and_rules_from_deployment ~dry_run let scan_metadata : OutJ.scan_metadata = { cli_version = Version.version; - unique_id = Uuidm.v `V4; + unique_id = Uuidm.v4_gen (Stdlib.Random.State.make_self_init ()) (); (* TODO: should look at conf.secrets, conf.sca, conf.code, etc. *) requested_products = []; dry_run = false; diff --git a/src/osemgrep/configuring/Semgrep_settings.ml b/src/osemgrep/configuring/Semgrep_settings.ml index 348cfd02627b..90928fbfb6ec 100644 --- a/src/osemgrep/configuring/Semgrep_settings.ml +++ b/src/osemgrep/configuring/Semgrep_settings.ml @@ -20,10 +20,11 @@ type t = { } let default = + let rand = Stdlib.Random.State.make_self_init () in { has_shown_metrics_notification = None; api_token = None; - anonymous_user_id = Uuidm.v `V4; + anonymous_user_id = Uuidm.v4_gen rand (); } (*****************************************************************************) diff --git a/src/osemgrep/core/Metrics_.ml b/src/osemgrep/core/Metrics_.ml index d3bff434a6e2..3f731977b453 100644 --- a/src/osemgrep/core/Metrics_.ml +++ b/src/osemgrep/core/Metrics_.ml @@ -144,8 +144,9 @@ type t = { let now () : Timedesc.Timestamp.t = Timedesc.Timestamp.now () let default_payload = + let rand = Stdlib.Random.State.make_self_init () in { - Semgrep_metrics_t.event_id = Uuidm.v `V4; + Semgrep_metrics_t.event_id = Uuidm.v4_gen rand (); anonymous_user_id = ""; started_at = now (); sent_at = now (); diff --git a/src/osemgrep/language_server/Test_LS_e2e.ml b/src/osemgrep/language_server/Test_LS_e2e.ml index b1f1926a9502..bb62542621f1 100644 --- a/src/osemgrep/language_server/Test_LS_e2e.ml +++ b/src/osemgrep/language_server/Test_LS_e2e.ml @@ -194,7 +194,9 @@ let send_map (type a) (info : server_info) packet (f : Packet.t -> a) : (*****************************************************************************) let send_request info request = - let id = Uuidm.v `V4 |> Uuidm.to_string in + let id = + Uuidm.v4_gen (Stdlib.Random.State.make_self_init ()) () |> Uuidm.to_string + in let packet = Packet.Request (CR.to_jsonrpc_request request (`String id)) in send_map info packet diff --git a/src/osemgrep/language_server/Unit_LS.ml b/src/osemgrep/language_server/Unit_LS.ml index 6140852ed981..9adf9083cf3f 100644 --- a/src/osemgrep/language_server/Unit_LS.ml +++ b/src/osemgrep/language_server/Unit_LS.ml @@ -103,7 +103,7 @@ let mock_run_results (files : string list) : Core_runner.result = let mock_workspace ?(git = false) () : Fpath.t = let rand_dir () = - let uuid = Uuidm.v `V4 in + let uuid = Uuidm.v4_gen (Stdlib.Random.State.make_self_init ()) () in let dir_name = "test_workspace_" ^ Uuidm.to_string uuid in let dir = Filename.concat (Filename.get_temp_dir_name ()) dir_name in Unix.mkdir dir 0o777; diff --git a/src/osemgrep/language_server/server/Lsp_.ml b/src/osemgrep/language_server/server/Lsp_.ml index 15d505933268..344f10a40b61 100644 --- a/src/osemgrep/language_server/server/Lsp_.ml +++ b/src/osemgrep/language_server/server/Lsp_.ml @@ -82,7 +82,9 @@ let respond (type r) (id : Id.t) (request : r CR.t) (response : r) = (** Send a request to the client *) let request request = - let id = Uuidm.v `V4 |> Uuidm.to_string in + let id = + Uuidm.v4_gen (Stdlib.Random.State.make_self_init ()) () |> Uuidm.to_string + in let request = SR.to_jsonrpc_request request (`String id) in Logs.debug (fun m -> m "Sending request %s" @@ -117,7 +119,9 @@ let notify_show_message ~kind s = (** Show a little progress circle while doing thing. Returns a token needed to end progress*) let create_progress title message = - let id = Uuidm.v `V4 |> Uuidm.to_string in + let id = + Uuidm.v4_gen (Stdlib.Random.State.make_self_init ()) () |> Uuidm.to_string + in Logs.debug (fun m -> m "Creating progress token %s, (%s: %s)" id title message); let token = ProgressToken.t_of_yojson (`String id) in diff --git a/src/osemgrep/networking/Semgrep_login.ml b/src/osemgrep/networking/Semgrep_login.ml index 3675983d348f..4d98a4c8f673 100644 --- a/src/osemgrep/networking/Semgrep_login.ml +++ b/src/osemgrep/networking/Semgrep_login.ml @@ -27,7 +27,8 @@ type login_session = shared_secret * Uri.t let support_url = "https://semgrep.dev/docs/support/" let make_login_url () = - let session_id = Uuidm.v `V4 in + let rand = Stdlib.Random.State.make_self_init () in + let session_id = Uuidm.v4_gen rand () in ( session_id, Uri.( add_query_params' diff --git a/src/osemgrep/reporting/Gitlab_output.ml b/src/osemgrep/reporting/Gitlab_output.ml index 189085988013..5f65cdaa8ff4 100644 --- a/src/osemgrep/reporting/Gitlab_output.ml +++ b/src/osemgrep/reporting/Gitlab_output.ml @@ -94,7 +94,7 @@ let format_cli_match (cli_match : OutT.cli_match) = let id = (* TODO the ?index argument needs to be provided (for ci_unique_key duplicates) *) Semgrep_hashing_functions.ci_unique_key cli_match - |> Uuidm.of_bytes |> Option.get |> Uuidm.to_string + |> Uuidm.of_binary_string |> Option.get |> Uuidm.to_string in let r = [