From 9fd839fad8fb3059ffebad2e35ff5f97460d6aaf Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 8 Mar 2024 15:31:34 +0100 Subject: [PATCH 1/2] Add rest endpoint to get all job data Fixes #203 --- api/swagger.json | 76 +++++++++++++++++++++++++++++++++++- api/swagger.yaml | 53 ++++++++++++++++++++++++- internal/api/docs.go | 76 +++++++++++++++++++++++++++++++++++- internal/api/rest.go | 93 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 292 insertions(+), 6 deletions(-) diff --git a/api/swagger.json b/api/swagger.json index 0142aa78..ba296eb6 100644 --- a/api/swagger.json +++ b/api/swagger.json @@ -694,6 +694,80 @@ } }, "/jobs/{id}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.", + "produces": [ + "application/json" + ], + "tags": [ + "Job query" + ], + "summary": "Get job meta and optional all metric data", + "parameters": [ + { + "type": "integer", + "description": "Database ID of Job", + "name": "id", + "in": "path", + "required": true + }, + { + "type": "boolean", + "description": "Include all available metrics", + "name": "all-metrics", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Job resource", + "schema": { + "$ref": "#/definitions/api.GetJobApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Resource not found", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "422": { + "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + }, "post": { "security": [ { @@ -710,7 +784,7 @@ "tags": [ "Job query" ], - "summary": "Get complete job meta and metric data", + "summary": "Get job meta and configurable metric data", "parameters": [ { "type": "integer", diff --git a/api/swagger.yaml b/api/swagger.yaml index add432a2..fbb4bdf4 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -630,6 +630,57 @@ paths: tags: - Job query /jobs/{id}: + get: + description: |- + Job to get is specified by database ID + Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. + parameters: + - description: Database ID of Job + in: path + name: id + required: true + type: integer + - description: Include all available metrics + in: query + name: all-metrics + type: boolean + produces: + - application/json + responses: + "200": + description: Job resource + schema: + $ref: '#/definitions/api.GetJobApiResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "403": + description: Forbidden + schema: + $ref: '#/definitions/api.ErrorResponse' + "404": + description: Resource not found + schema: + $ref: '#/definitions/api.ErrorResponse' + "422": + description: 'Unprocessable Entity: finding job failed: sql: no rows in + result set' + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Get job meta and optional all metric data + tags: + - Job query post: consumes: - application/json @@ -684,7 +735,7 @@ paths: $ref: '#/definitions/api.ErrorResponse' security: - ApiKeyAuth: [] - summary: Get complete job meta and metric data + summary: Get job meta and configurable metric data tags: - Job query /jobs/delete_job/: diff --git a/internal/api/docs.go b/internal/api/docs.go index c0a34e71..1cd5df13 100644 --- a/internal/api/docs.go +++ b/internal/api/docs.go @@ -700,6 +700,80 @@ const docTemplate = `{ } }, "/jobs/{id}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.", + "produces": [ + "application/json" + ], + "tags": [ + "Job query" + ], + "summary": "Get job meta and optional all metric data", + "parameters": [ + { + "type": "integer", + "description": "Database ID of Job", + "name": "id", + "in": "path", + "required": true + }, + { + "type": "boolean", + "description": "Include all available metrics", + "name": "all-metrics", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Job resource", + "schema": { + "$ref": "#/definitions/api.GetJobApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "404": { + "description": "Resource not found", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "422": { + "description": "Unprocessable Entity: finding job failed: sql: no rows in result set", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + }, "post": { "security": [ { @@ -716,7 +790,7 @@ const docTemplate = `{ "tags": [ "Job query" ], - "summary": "Get complete job meta and metric data", + "summary": "Get job meta and configurable metric data", "parameters": [ { "type": "integer", diff --git a/internal/api/rest.go b/internal/api/rest.go index 564bd1cb..0d42437c 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -70,6 +70,7 @@ func (api *RestApi) MountRoutes(r *mux.Router) { r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet) r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost) + r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet) r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch) r.HandleFunc("/jobs/edit_meta/{id}", api.editMeta).Methods(http.MethodPost, http.MethodPatch) r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet) @@ -162,6 +163,11 @@ type GetJobApiResponse struct { Data []*JobMetricWithName } +type GetCompleteJobApiResponse struct { + Meta *schema.Job + Data schema.JobData +} + type JobMetricWithName struct { Name string `json:"name"` Scope schema.MetricScope `json:"scope"` @@ -376,14 +382,95 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { } // getJobById godoc -// @summary Get complete job meta and metric data +// @summary Get job meta and optional all metric data +// @tags Job query +// @description Job to get is specified by database ID +// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. +// @produce json +// @param id path int true "Database ID of Job" +// @param all-metrics query bool false "Include all available metrics" +// @success 200 {object} api.GetJobApiResponse "Job resource" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 404 {object} api.ErrorResponse "Resource not found" +// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /jobs/{id} [get] +func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) { + if user := repository.GetUserFromContext(r.Context()); user != nil && + !user.HasRole(schema.RoleApi) { + + handleError(fmt.Errorf("missing role: %v", + schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw) + return + } + + // Fetch job from db + id, ok := mux.Vars(r)["id"] + var job *schema.Job + var err error + if ok { + id, e := strconv.ParseInt(id, 10, 64) + if e != nil { + handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw) + return + } + + job, err = api.JobRepository.FindById(id) + } else { + handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw) + return + } + if err != nil { + handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw) + return + } + + var scopes []schema.MetricScope + + if job.NumNodes == 1 { + scopes = []schema.MetricScope{"core"} + } else { + scopes = []schema.MetricScope{"node"} + } + + var data schema.JobData + + if r.URL.Query().Has("all-metrics") { + data, err = metricdata.LoadData(job, nil, scopes, r.Context()) + if err != nil { + log.Warn("Error while loading job data") + return + } + } + + log.Debugf("/api/job/%s: get job %d", id, job.JobID) + rw.Header().Add("Content-Type", "application/json") + bw := bufio.NewWriter(rw) + defer bw.Flush() + + payload := GetCompleteJobApiResponse{ + Meta: job, + Data: data, + } + + if err := json.NewEncoder(bw).Encode(payload); err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } +} + +// getJobById godoc +// @summary Get job meta and configurable metric data // @tags Job query // @description Job to get is specified by database ID // @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'. // @accept json // @produce json -// @param id path int true "Database ID of Job" -// @param request body api.GetJobApiRequest true "Array of metric names" +// @param id path int true "Database ID of Job" +// @param request body api.GetJobApiRequest true "Array of metric names" // @success 200 {object} api.GetJobApiResponse "Job resource" // @failure 400 {object} api.ErrorResponse "Bad Request" // @failure 401 {object} api.ErrorResponse "Unauthorized" From 99d55f05f8ab74ca6bc0f65136158749f9c4f626 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 8 Mar 2024 16:35:30 +0100 Subject: [PATCH 2/2] feat: Add cluster config endpoint to rest api --- api/swagger.json | 263 +++++++++++++++++++++++++++++++++++++++++++ api/swagger.yaml | 172 ++++++++++++++++++++++++++++ internal/api/docs.go | 263 +++++++++++++++++++++++++++++++++++++++++++ internal/api/rest.go | 62 +++++++++- 4 files changed, 756 insertions(+), 4 deletions(-) diff --git a/api/swagger.json b/api/swagger.json index ba296eb6..7f5eaf7c 100644 --- a/api/swagger.json +++ b/api/swagger.json @@ -17,6 +17,63 @@ "host": "localhost:8080", "basePath": "/api", "paths": { + "/clusters/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.", + "produces": [ + "application/json" + ], + "tags": [ + "Cluster query" + ], + "summary": "Lists all cluster configs", + "parameters": [ + { + "type": "string", + "description": "Job Cluster", + "name": "cluster", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Array of clusters", + "schema": { + "$ref": "#/definitions/api.GetClustersApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } + }, "/jobs/": { "get": { "security": [ @@ -1284,6 +1341,18 @@ } } }, + "api.GetClustersApiResponse": { + "type": "object", + "properties": { + "clusters": { + "description": "Array of clusters", + "type": "array", + "items": { + "$ref": "#/definitions/schema.Cluster" + } + } + } + }, "api.GetJobApiResponse": { "type": "object", "properties": { @@ -1379,6 +1448,40 @@ } } }, + "schema.Accelerator": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "schema.Cluster": { + "type": "object", + "properties": { + "metricConfig": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.MetricConfig" + } + }, + "name": { + "type": "string" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubCluster" + } + } + } + }, "schema.Job": { "description": "Information of a HPC job.", "type": "object", @@ -1777,6 +1880,44 @@ } } }, + "schema.MetricConfig": { + "type": "object", + "properties": { + "aggregation": { + "type": "string" + }, + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "scope": { + "$ref": "#/definitions/schema.MetricScope" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubClusterConfig" + } + }, + "timestep": { + "type": "integer" + }, + "unit": { + "$ref": "#/definitions/schema.Unit" + } + } + }, "schema.MetricScope": { "type": "string", "enum": [ @@ -1812,6 +1953,17 @@ } } }, + "schema.MetricValue": { + "type": "object", + "properties": { + "unit": { + "$ref": "#/definitions/schema.Unit" + }, + "value": { + "type": "number" + } + } + }, "schema.Resource": { "description": "A resource used by a job", "type": "object", @@ -1892,6 +2044,64 @@ } } }, + "schema.SubCluster": { + "type": "object", + "properties": { + "coresPerSocket": { + "type": "integer" + }, + "flopRateScalar": { + "$ref": "#/definitions/schema.MetricValue" + }, + "flopRateSimd": { + "$ref": "#/definitions/schema.MetricValue" + }, + "memoryBandwidth": { + "$ref": "#/definitions/schema.MetricValue" + }, + "name": { + "type": "string" + }, + "nodes": { + "type": "string" + }, + "processorType": { + "type": "string" + }, + "socketsPerNode": { + "type": "integer" + }, + "threadsPerCore": { + "type": "integer" + }, + "topology": { + "$ref": "#/definitions/schema.Topology" + } + } + }, + "schema.SubClusterConfig": { + "type": "object", + "properties": { + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "remove": { + "type": "boolean" + } + } + }, "schema.Tag": { "description": "Defines a tag using name and type.", "type": "object", @@ -1912,6 +2122,59 @@ } } }, + "schema.Topology": { + "type": "object", + "properties": { + "accelerators": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Accelerator" + } + }, + "core": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "die": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "memoryDomain": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "node": { + "type": "array", + "items": { + "type": "integer" + } + }, + "socket": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, "schema.Unit": { "type": "object", "properties": { diff --git a/api/swagger.yaml b/api/swagger.yaml index fbb4bdf4..f47ac3fa 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -68,6 +68,14 @@ definitions: description: Statustext of Errorcode type: string type: object + api.GetClustersApiResponse: + properties: + clusters: + description: Array of clusters + items: + $ref: '#/definitions/schema.Cluster' + type: array + type: object api.GetJobApiResponse: properties: data: @@ -133,6 +141,28 @@ definitions: - jobState - stopTime type: object + schema.Accelerator: + properties: + id: + type: string + model: + type: string + type: + type: string + type: object + schema.Cluster: + properties: + metricConfig: + items: + $ref: '#/definitions/schema.MetricConfig' + type: array + name: + type: string + subClusters: + items: + $ref: '#/definitions/schema.SubCluster' + type: array + type: object schema.Job: description: Information of a HPC job. properties: @@ -448,6 +478,31 @@ definitions: unit: $ref: '#/definitions/schema.Unit' type: object + schema.MetricConfig: + properties: + aggregation: + type: string + alert: + type: number + caution: + type: number + name: + type: string + normal: + type: number + peak: + type: number + scope: + $ref: '#/definitions/schema.MetricScope' + subClusters: + items: + $ref: '#/definitions/schema.SubClusterConfig' + type: array + timestep: + type: integer + unit: + $ref: '#/definitions/schema.Unit' + type: object schema.MetricScope: enum: - invalid_scope @@ -475,6 +530,13 @@ definitions: min: type: number type: object + schema.MetricValue: + properties: + unit: + $ref: '#/definitions/schema.Unit' + value: + type: number + type: object schema.Resource: description: A resource used by a job properties: @@ -529,6 +591,44 @@ definitions: type: array type: object type: object + schema.SubCluster: + properties: + coresPerSocket: + type: integer + flopRateScalar: + $ref: '#/definitions/schema.MetricValue' + flopRateSimd: + $ref: '#/definitions/schema.MetricValue' + memoryBandwidth: + $ref: '#/definitions/schema.MetricValue' + name: + type: string + nodes: + type: string + processorType: + type: string + socketsPerNode: + type: integer + threadsPerCore: + type: integer + topology: + $ref: '#/definitions/schema.Topology' + type: object + schema.SubClusterConfig: + properties: + alert: + type: number + caution: + type: number + name: + type: string + normal: + type: number + peak: + type: number + remove: + type: boolean + type: object schema.Tag: description: Defines a tag using name and type. properties: @@ -544,6 +644,41 @@ definitions: example: Debug type: string type: object + schema.Topology: + properties: + accelerators: + items: + $ref: '#/definitions/schema.Accelerator' + type: array + core: + items: + items: + type: integer + type: array + type: array + die: + items: + items: + type: integer + type: array + type: array + memoryDomain: + items: + items: + type: integer + type: array + type: array + node: + items: + type: integer + type: array + socket: + items: + items: + type: integer + type: array + type: array + type: object schema.Unit: properties: base: @@ -564,6 +699,43 @@ info: title: ClusterCockpit REST API version: 1.0.0 paths: + /clusters/: + get: + description: Get a list of all cluster configs. Specific cluster can be requested + using query parameter. + parameters: + - description: Job Cluster + in: query + name: cluster + type: string + produces: + - application/json + responses: + "200": + description: Array of clusters + schema: + $ref: '#/definitions/api.GetClustersApiResponse' + "400": + description: Bad Request + schema: + $ref: '#/definitions/api.ErrorResponse' + "401": + description: Unauthorized + schema: + $ref: '#/definitions/api.ErrorResponse' + "403": + description: Forbidden + schema: + $ref: '#/definitions/api.ErrorResponse' + "500": + description: Internal Server Error + schema: + $ref: '#/definitions/api.ErrorResponse' + security: + - ApiKeyAuth: [] + summary: Lists all cluster configs + tags: + - Cluster query /jobs/: get: description: |- diff --git a/internal/api/docs.go b/internal/api/docs.go index 1cd5df13..e5ec50b7 100644 --- a/internal/api/docs.go +++ b/internal/api/docs.go @@ -23,6 +23,63 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { + "/clusters/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.", + "produces": [ + "application/json" + ], + "tags": [ + "Cluster query" + ], + "summary": "Lists all cluster configs", + "parameters": [ + { + "type": "string", + "description": "Job Cluster", + "name": "cluster", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Array of clusters", + "schema": { + "$ref": "#/definitions/api.GetClustersApiResponse" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "401": { + "description": "Unauthorized", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "403": { + "description": "Forbidden", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "$ref": "#/definitions/api.ErrorResponse" + } + } + } + } + }, "/jobs/": { "get": { "security": [ @@ -1290,6 +1347,18 @@ const docTemplate = `{ } } }, + "api.GetClustersApiResponse": { + "type": "object", + "properties": { + "clusters": { + "description": "Array of clusters", + "type": "array", + "items": { + "$ref": "#/definitions/schema.Cluster" + } + } + } + }, "api.GetJobApiResponse": { "type": "object", "properties": { @@ -1385,6 +1454,40 @@ const docTemplate = `{ } } }, + "schema.Accelerator": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "schema.Cluster": { + "type": "object", + "properties": { + "metricConfig": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.MetricConfig" + } + }, + "name": { + "type": "string" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubCluster" + } + } + } + }, "schema.Job": { "description": "Information of a HPC job.", "type": "object", @@ -1783,6 +1886,44 @@ const docTemplate = `{ } } }, + "schema.MetricConfig": { + "type": "object", + "properties": { + "aggregation": { + "type": "string" + }, + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "scope": { + "$ref": "#/definitions/schema.MetricScope" + }, + "subClusters": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.SubClusterConfig" + } + }, + "timestep": { + "type": "integer" + }, + "unit": { + "$ref": "#/definitions/schema.Unit" + } + } + }, "schema.MetricScope": { "type": "string", "enum": [ @@ -1818,6 +1959,17 @@ const docTemplate = `{ } } }, + "schema.MetricValue": { + "type": "object", + "properties": { + "unit": { + "$ref": "#/definitions/schema.Unit" + }, + "value": { + "type": "number" + } + } + }, "schema.Resource": { "description": "A resource used by a job", "type": "object", @@ -1898,6 +2050,64 @@ const docTemplate = `{ } } }, + "schema.SubCluster": { + "type": "object", + "properties": { + "coresPerSocket": { + "type": "integer" + }, + "flopRateScalar": { + "$ref": "#/definitions/schema.MetricValue" + }, + "flopRateSimd": { + "$ref": "#/definitions/schema.MetricValue" + }, + "memoryBandwidth": { + "$ref": "#/definitions/schema.MetricValue" + }, + "name": { + "type": "string" + }, + "nodes": { + "type": "string" + }, + "processorType": { + "type": "string" + }, + "socketsPerNode": { + "type": "integer" + }, + "threadsPerCore": { + "type": "integer" + }, + "topology": { + "$ref": "#/definitions/schema.Topology" + } + } + }, + "schema.SubClusterConfig": { + "type": "object", + "properties": { + "alert": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "name": { + "type": "string" + }, + "normal": { + "type": "number" + }, + "peak": { + "type": "number" + }, + "remove": { + "type": "boolean" + } + } + }, "schema.Tag": { "description": "Defines a tag using name and type.", "type": "object", @@ -1918,6 +2128,59 @@ const docTemplate = `{ } } }, + "schema.Topology": { + "type": "object", + "properties": { + "accelerators": { + "type": "array", + "items": { + "$ref": "#/definitions/schema.Accelerator" + } + }, + "core": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "die": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "memoryDomain": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "node": { + "type": "array", + "items": { + "type": "integer" + } + }, + "socket": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, "schema.Unit": { "type": "object", "properties": { diff --git a/internal/api/rest.go b/internal/api/rest.go index 0d42437c..807e7aeb 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -78,6 +78,8 @@ func (api *RestApi) MountRoutes(r *mux.Router) { r.HandleFunc("/jobs/delete_job/{id}", api.deleteJobById).Methods(http.MethodDelete) r.HandleFunc("/jobs/delete_job_before/{ts}", api.deleteJobBefore).Methods(http.MethodDelete) + r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet) + if api.MachineStateDir != "" { r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet) r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost) @@ -134,6 +136,11 @@ type GetJobsApiResponse struct { Page int `json:"page"` // Page id returned } +// GetClustersApiResponse model +type GetClustersApiResponse struct { + Clusters []*schema.Cluster `json:"clusters"` // Array of clusters +} + // ErrorResponse model type ErrorResponse struct { // Statustext of Errorcode @@ -236,6 +243,55 @@ func securedCheck(r *http.Request) error { return nil } +// getClusters godoc +// @summary Lists all cluster configs +// @tags Cluster query +// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter. +// @produce json +// @param cluster query string false "Job Cluster" +// @success 200 {object} api.GetClustersApiResponse "Array of clusters" +// @failure 400 {object} api.ErrorResponse "Bad Request" +// @failure 401 {object} api.ErrorResponse "Unauthorized" +// @failure 403 {object} api.ErrorResponse "Forbidden" +// @failure 500 {object} api.ErrorResponse "Internal Server Error" +// @security ApiKeyAuth +// @router /clusters/ [get] +func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) { + if user := repository.GetUserFromContext(r.Context()); user != nil && + !user.HasRole(schema.RoleApi) { + + handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw) + return + } + + rw.Header().Add("Content-Type", "application/json") + bw := bufio.NewWriter(rw) + defer bw.Flush() + + var clusters []*schema.Cluster + + if r.URL.Query().Has("cluster") { + name := r.URL.Query().Get("cluster") + cluster := archive.GetCluster(name) + if cluster == nil { + handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw) + return + } + clusters = append(clusters, cluster) + } else { + clusters = archive.Clusters + } + + payload := GetClustersApiResponse{ + Clusters: clusters, + } + + if err := json.NewEncoder(bw).Encode(payload); err != nil { + handleError(err, http.StatusInternalServerError, rw) + return + } +} + // getJobs godoc // @summary Lists all jobs // @tags Job query @@ -354,10 +410,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) { if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful { res.Statistics, err = archive.GetStatistics(job) if err != nil { - if err != nil { - handleError(err, http.StatusInternalServerError, rw) - return - } + handleError(err, http.StatusInternalServerError, rw) + return } }