-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7dba7a0
commit 481603d
Showing
16 changed files
with
4,761 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#pragma once | ||
|
||
#include "ggml.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; | ||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t; | ||
typedef struct ggml_backend * ggml_backend_t; | ||
|
||
// Tensor allocator | ||
struct ggml_tallocr { | ||
ggml_backend_buffer_t buffer; | ||
void * base; | ||
size_t alignment; | ||
size_t offset; | ||
}; | ||
|
||
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); | ||
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); | ||
|
||
// Graph allocator | ||
/* | ||
Example usage: | ||
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type()); | ||
// optional: create a worst-case graph and reserve the buffers to avoid reallocations | ||
ggml_gallocr_reserve(galloc, build_graph(max_batch)); | ||
// allocate the graph | ||
struct ggml_cgraph * graph = build_graph(batch); | ||
ggml_gallocr_alloc_graph(galloc, graph); | ||
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0)); | ||
// evaluate the graph | ||
ggml_backend_graph_compute(backend, graph); | ||
*/ | ||
|
||
// special tensor flags for use with the graph allocator: | ||
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses | ||
// ggml_set_output(): output tensors are never freed and never overwritten | ||
|
||
typedef struct ggml_gallocr * ggml_gallocr_t; | ||
|
||
GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft); | ||
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs); | ||
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); | ||
|
||
// pre-allocate buffers from a measure graph - does not allocate or modify the graph | ||
// call with a worst-case graph to avoid buffer reallocations | ||
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed | ||
// returns false if the buffer allocation failed | ||
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); | ||
GGML_API bool ggml_gallocr_reserve_n( | ||
ggml_gallocr_t galloc, | ||
struct ggml_cgraph * graph, | ||
const int * node_buffer_ids, | ||
const int * leaf_buffer_ids); | ||
|
||
// automatic reallocation if the topology changes when using a single buffer | ||
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) | ||
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph); | ||
|
||
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id); | ||
|
||
// Utils | ||
// Create a buffer and allocate all the tensors in a ggml_context | ||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft); | ||
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#pragma once | ||
|
||
#include "ggml.h" | ||
#include "ggml-backend.h" | ||
|
||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// buffer_type API | ||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void); | ||
|
||
GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend); | ||
|
||
// backend API | ||
GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void); | ||
|
||
GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads); | ||
|
||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#pragma once | ||
|
||
#include "ggml.h" | ||
#include "ggml-backend.h" | ||
|
||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// backend API | ||
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void); | ||
|
||
GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend); | ||
|
||
// number of threads used for conversion to float | ||
// for openblas and blis, this will also set the number of threads used for blas operations | ||
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads); | ||
|
||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void); | ||
|
||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* | ||
* Copyright (c) 2023-2024 The ggml authors | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to | ||
* deal in the Software without restriction, including without limitation the | ||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
* sell copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
* IN THE SOFTWARE. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "ggml-backend.h" | ||
#include "ggml.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
/** | ||
* @brief Maximum number of CANN devices supported. | ||
*/ | ||
#define GGML_CANN_MAX_DEVICES 16 | ||
|
||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void); | ||
|
||
/** | ||
* @brief Initializes the CANN backend for a specified device. | ||
* | ||
* This function initializes the CANN backend for the given device. | ||
* It verifies the device index, allocates a context, and creates a backend | ||
* instance. | ||
* | ||
* @param device The index of the device to initialize. | ||
* @return A pointer to the initialized backend instance, or nullptr on failure. | ||
*/ | ||
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device); | ||
|
||
/** | ||
* @brief Checks if a given backend is a CANN backend. | ||
* | ||
* This function verifies if the provided backend is a CANN backend by comparing | ||
* its GUID with the CANN backend's GUID. | ||
* | ||
* @param backend The backend instance to check. | ||
* @return True if the backend is a CANN backend, false otherwise. | ||
*/ | ||
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend); | ||
|
||
/** | ||
* @brief Retrieves the CANN buffer type for a specified device. | ||
* | ||
* This function initializes and returns the buffer type interface associated | ||
* with the given device. It ensures thread-safe access using a mutex. | ||
* | ||
* @param device The device index for which to retrieve the buffer type. | ||
* @return A pointer to the buffer type interface for the specified device, or | ||
* nullptr if the device index is out of range. | ||
*/ | ||
GGML_BACKEND_API ggml_backend_buffer_type_t | ||
ggml_backend_cann_buffer_type(int32_t device); | ||
|
||
/** | ||
* @brief Retrieves the number of CANN devices available. | ||
* | ||
* This function returns the number of CANN devices available based on | ||
* information obtained from `ggml_cann_info()`. | ||
* | ||
* @return The number of CANN devices available. | ||
*/ | ||
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void); | ||
|
||
/** | ||
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU. | ||
* | ||
* @return A pointer to the host buffer type interface. | ||
*/ | ||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void); | ||
|
||
/** | ||
* @brief Retrieves the description of a specific CANN device. | ||
* | ||
* This function sets the specified device, retrieves the SoC name, | ||
* and writes it into the provided description buffer. | ||
* | ||
* @param device The device index to retrieve the description for. | ||
* @param description Pointer to a buffer where the description will be written. | ||
* @param description_size Size of the description buffer. | ||
*/ | ||
GGML_BACKEND_API void ggml_backend_cann_get_device_description( | ||
int32_t device, char* description, size_t description_size); | ||
|
||
/** | ||
* @brief Retrieves the memory information of a specific CANN device. | ||
* | ||
* This function sets the specified device, retrieves the free and total | ||
* memory information of the specified type (ACL_HBM_MEM), and stores them | ||
* in the provided pointers. | ||
* | ||
* @param device The device index to retrieve memory information for. | ||
* @param free Pointer to a variable where the free memory size will be stored. | ||
* @param total Pointer to a variable where the total memory size will be | ||
* stored. | ||
*/ | ||
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device, | ||
size_t* free, | ||
size_t* total); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#pragma once | ||
|
||
#ifndef __cplusplus | ||
#error "This header is for C++ only" | ||
#endif | ||
|
||
#include "ggml.h" | ||
#include "ggml-alloc.h" | ||
#include "ggml-backend.h" | ||
#include <memory> | ||
|
||
// Smart pointers for ggml types | ||
|
||
// ggml | ||
|
||
struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } }; | ||
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } }; | ||
|
||
typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr; | ||
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr; | ||
|
||
// ggml-alloc | ||
|
||
struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } }; | ||
|
||
typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr; | ||
|
||
// ggml-backend | ||
|
||
struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } }; | ||
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } }; | ||
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } }; | ||
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } }; | ||
|
||
typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr; | ||
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr; | ||
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr; | ||
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr; |
Oops, something went wrong.