Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
MaybeShewill-CV committed Nov 26, 2024
1 parent 7dba7a0 commit 481603d
Show file tree
Hide file tree
Showing 16 changed files with 4,761 additions and 0 deletions.
76 changes: 76 additions & 0 deletions 3rd_party/include/ggml/ggml-alloc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#pragma once

#include "ggml.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend * ggml_backend_t;

// Tensor allocator
struct ggml_tallocr {
ggml_backend_buffer_t buffer;
void * base;
size_t alignment;
size_t offset;
};

GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);

// Graph allocator
/*
Example usage:
ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
// optional: create a worst-case graph and reserve the buffers to avoid reallocations
ggml_gallocr_reserve(galloc, build_graph(max_batch));
// allocate the graph
struct ggml_cgraph * graph = build_graph(batch);
ggml_gallocr_alloc_graph(galloc, graph);
printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
// evaluate the graph
ggml_backend_graph_compute(backend, graph);
*/

// special tensor flags for use with the graph allocator:
// ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
// ggml_set_output(): output tensors are never freed and never overwritten

typedef struct ggml_gallocr * ggml_gallocr_t;

GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);

// pre-allocate buffers from a measure graph - does not allocate or modify the graph
// call with a worst-case graph to avoid buffer reallocations
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
GGML_API bool ggml_gallocr_reserve_n(
ggml_gallocr_t galloc,
struct ggml_cgraph * graph,
const int * node_buffer_ids,
const int * leaf_buffer_ids);

// automatic reallocation if the topology changes when using a single buffer
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);

GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);

// Utils
// Create a buffer and allocate all the tensors in a ggml_context
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);

#ifdef __cplusplus
}
#endif
25 changes: 25 additions & 0 deletions 3rd_party/include/ggml/ggml-amx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"


#ifdef __cplusplus
extern "C" {
#endif

// buffer_type API
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);

GGML_BACKEND_API bool ggml_backend_is_amx(ggml_backend_t backend);

// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_amx_init(void);

GGML_BACKEND_API void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_amx_reg(void);

#ifdef __cplusplus
}
#endif
336 changes: 336 additions & 0 deletions 3rd_party/include/ggml/ggml-backend.h

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions 3rd_party/include/ggml/ggml-blas.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"


#ifdef __cplusplus
extern "C" {
#endif

// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);

GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);

// number of threads used for conversion to float
// for openblas and blis, this will also set the number of threads used for blas operations
GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);


#ifdef __cplusplus
}
#endif
123 changes: 123 additions & 0 deletions 3rd_party/include/ggml/ggml-cann.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#pragma once

#include "ggml-backend.h"
#include "ggml.h"

#ifdef __cplusplus
extern "C" {
#endif

/**
* @brief Maximum number of CANN devices supported.
*/
#define GGML_CANN_MAX_DEVICES 16

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cann_reg(void);

/**
* @brief Initializes the CANN backend for a specified device.
*
* This function initializes the CANN backend for the given device.
* It verifies the device index, allocates a context, and creates a backend
* instance.
*
* @param device The index of the device to initialize.
* @return A pointer to the initialized backend instance, or nullptr on failure.
*/
GGML_BACKEND_API ggml_backend_t ggml_backend_cann_init(int32_t device);

/**
* @brief Checks if a given backend is a CANN backend.
*
* This function verifies if the provided backend is a CANN backend by comparing
* its GUID with the CANN backend's GUID.
*
* @param backend The backend instance to check.
* @return True if the backend is a CANN backend, false otherwise.
*/
GGML_BACKEND_API bool ggml_backend_is_cann(ggml_backend_t backend);

/**
* @brief Retrieves the CANN buffer type for a specified device.
*
* This function initializes and returns the buffer type interface associated
* with the given device. It ensures thread-safe access using a mutex.
*
* @param device The device index for which to retrieve the buffer type.
* @return A pointer to the buffer type interface for the specified device, or
* nullptr if the device index is out of range.
*/
GGML_BACKEND_API ggml_backend_buffer_type_t
ggml_backend_cann_buffer_type(int32_t device);

/**
* @brief Retrieves the number of CANN devices available.
*
* This function returns the number of CANN devices available based on
* information obtained from `ggml_cann_info()`.
*
* @return The number of CANN devices available.
*/
GGML_BACKEND_API int32_t ggml_backend_cann_get_device_count(void);

/**
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
*
* @return A pointer to the host buffer type interface.
*/
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);

/**
* @brief Retrieves the description of a specific CANN device.
*
* This function sets the specified device, retrieves the SoC name,
* and writes it into the provided description buffer.
*
* @param device The device index to retrieve the description for.
* @param description Pointer to a buffer where the description will be written.
* @param description_size Size of the description buffer.
*/
GGML_BACKEND_API void ggml_backend_cann_get_device_description(
int32_t device, char* description, size_t description_size);

/**
* @brief Retrieves the memory information of a specific CANN device.
*
* This function sets the specified device, retrieves the free and total
* memory information of the specified type (ACL_HBM_MEM), and stores them
* in the provided pointers.
*
* @param device The device index to retrieve memory information for.
* @param free Pointer to a variable where the free memory size will be stored.
* @param total Pointer to a variable where the total memory size will be
* stored.
*/
GGML_BACKEND_API void ggml_backend_cann_get_device_memory(int32_t device,
size_t* free,
size_t* total);

#ifdef __cplusplus
}
#endif
38 changes: 38 additions & 0 deletions 3rd_party/include/ggml/ggml-cpp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#ifndef __cplusplus
#error "This header is for C++ only"
#endif

#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include <memory>

// Smart pointers for ggml types

// ggml

struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };

typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;

// ggml-alloc

struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };

typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;

// ggml-backend

struct ggml_backend_deleter { void operator()(ggml_backend_t backend) { ggml_backend_free(backend); } };
struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
struct ggml_backend_event_deleter { void operator()(ggml_backend_event_t event) { ggml_backend_event_free(event); } };
struct ggml_backend_sched_deleter { void operator()(ggml_backend_sched_t sched) { ggml_backend_sched_free(sched); } };

typedef std::unique_ptr<ggml_backend, ggml_backend_deleter> ggml_backend_ptr;
typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
typedef std::unique_ptr<ggml_backend_event, ggml_backend_event_deleter> ggml_backend_event_ptr;
typedef std::unique_ptr<ggml_backend_sched, ggml_backend_sched_deleter> ggml_backend_sched_ptr;
Loading

0 comments on commit 481603d

Please sign in to comment.