basic matrix multiply example for C++ api? #17

troyliu0105 · 2021-09-12T14:53:15Z

I couldn't find any c++ examples (including basic matrix multiply) here. Any related code snippet would be very helpful... 😢

dumpinfo · 2021-09-13T01:49:31Z

template<int M, bool NeedEncodeX>
void _run_bolt_matmul(const RowMatrix& X, const RowMatrix& Q,
ColMatrix centroids, RowMatrix<uint8_t> codes,
RowVector offsets, float scaleby,
ColMatrix<uint8_t> lut_out, RowMatrix<uint16_t> out)
{
auto ncols = X.cols();
auto nqueries = Q.rows();
auto nblocks = X.rows() / 32;
REQUIRE(ncols == Q.cols());
REQUIRE((nblocks * 32) == X.rows());

if (NeedEncodeX) {
    bolt_encode<M>(X.data(), X.rows(), (int)X.cols(), centroids.data(), codes.data());
}

for (int i = 0; i < nqueries; i++) {
    const float* q = Q.row(i).data();
    bolt_lut<M>(q, (int)ncols, centroids.data(), offsets.data(),
                scaleby, lut_out.data());
    bolt_scan<M, true>(codes.data(), lut_out.data(), out.row(i).data(), nblocks);
}

}
///#########################################################################
//##########################################################################
template
void _profile_bolt_matmul(int nrows, int ncols, int nqueries) {
static constexpr int ncodebooks = 2 * M;

// create random data
RowMatrix<float> X(nrows, ncols);
X.setRandom();

// create random queries
RowMatrix<float> Q(nqueries, ncols);
Q.setRandom();

// create random centroids
ColMatrix<float> centroids(ncentroids, ncols);
centroids.setRandom();

// create random codes in [0, 15]
ColMatrix<uint8_t> codes(nrows, ncodebooks);
codes.setRandom();
codes = codes.array() / 16;

// create random / arbitrary offsets and scale factor for luts
RowVector<float> offsets(ncols);
offsets.setRandom();
float scaleby = 3; // arbitrary number

// storage for luts, product
ColMatrix<uint8_t> luts(ncentroids, ncodebooks);
RowMatrix<uint16_t> out(nqueries, nrows);

// time it
std::string msg = string_with_format("bolt<%d> encode=%d matmul %d",
                                     M, false, nqueries);
REPEATED_PROFILE_DIST_COMPUTATION(kNreps, msg, kNtrials,
    out.data(), nrows * nqueries,
    (_run_bolt_matmul<M, false>(X, Q, centroids, codes, offsets, scaleby, luts, out)) );

std::string msg2 = string_with_format("bolt<%d> encode=%d matmul %d",
                                     M, true, nqueries);
REPEATED_PROFILE_DIST_COMPUTATION(kNreps, msg2, kNtrials,
    out.data(), nrows * nqueries,
    (_run_bolt_matmul<M, true>(X, Q, centroids, codes, offsets, scaleby, luts, out)) );

}
//################################################################

template
struct mithral_amm_task {
using traits = mithral_input_type_traits;
using scale_t = typename traits::encoding_scales_type;
using offset_t = typename traits::encoding_offsets_type;
using output_t = typename traits::output_type;
static constexpr int scan_block_nrows = 32;
static constexpr int ncentroids = 16;
static constexpr int nsplits_per_codebook = 4;
static constexpr int max_splitvals = 1 << 4;

mithral_amm_task(int N, int D, int M, int ncodebooks,
                 float lut_work_const):
    N_padded(N % scan_block_nrows == 0 ? N :
        N + (scan_block_nrows - (N % scan_block_nrows))),
    centroids(ncentroids * ncodebooks, D),
    nsplits(ncodebooks * nsplits_per_codebook),
    splitdims(nsplits),
    splitvals(max_splitvals, nsplits),
    encode_scales(nsplits),
    encode_offsets(nsplits),
    nnz_per_centroid(lut_work_const > 0 ?
        lut_work_const * D / ncodebooks : D),
    idxs(ncodebooks, nnz_per_centroid),
    amm(N_padded, D, M, ncodebooks, centroids.data(),
        splitdims.data(), splitvals.data(),
        encode_scales.data(), encode_offsets.data(),
        idxs.data(), nnz_per_centroid),
    X(N_padded, D),
    Q(D, M)
{
    centroids.setRandom();
    splitdims.setRandom();
    for (int i = 0; i < splitdims.size(); i++) {
        splitdims(i) = splitdims(i) % D;
    }
    splitvals.setRandom();
    encode_scales.setRandom();
    encode_offsets.setRandom();

    // randomly initialize idxs, ensuring all are unique and < D
    idxs.setRandom();
    int all_idxs[D];
    for (int i = 0; i < D; i++) {
        all_idxs[i] = i;
    }
    std::random_device rd;
    std::mt19937 g(rd());  // why can't shuffle just create its own...
    for (int c = 0; c < ncodebooks; c++) {  // random sequential idxs
        std::shuffle(all_idxs, all_idxs + D, g);
        std::sort(all_idxs, all_idxs + nnz_per_centroid);
        for (int j = 0; j < nnz_per_centroid; j++) {
            idxs(c, j) = all_idxs[j];
        }
    }

    X.setRandom();
    Q.setRandom();
}

void encode() { amm.encode(X.data()); }
void lut() { amm.lut(Q.data()); }
void scan() { amm.scan(); }

void run_matmul(bool create_lut=true) {
    encode();
    if (create_lut) {
        lut();
    }
    scan();
}

const ColMatrix<output_t>& output() const { return amm.out_mat; }

// stuff we pass into the amm object (would be learned during training)
int N_padded;
ColMatrix<float> centroids;
int nsplits;
RowVector<uint32_t> splitdims;
ColMatrix<int8_t> splitvals;
RowVector<scale_t> encode_scales;
RowVector<offset_t> encode_offsets;
int nnz_per_centroid;
RowMatrix<int> idxs;

// amm object
mithral_amm<InputT> amm;

// random data
ColMatrix<InputT> X;
ColMatrix<float> Q;

};
//#################################################################
void run_matmul(bool create_lut=true) {
encode();
if (create_lut) {
lut();
}
scan();
}

troyliu0105 · 2021-09-13T08:29:38Z

@dumpinfo Thx for your reply. I'm going to test it. 😆

a3413209 · 2021-09-21T03:11:17Z

Hello, how to use this test case, I have compiled the source file：bazel run ：main，What should I do afterwards, can I tell you more about it?

dblalock · 2021-09-27T05:28:17Z

To run any of the MADDNESS code, use cpp/test/main.cpp. I used catch tests for everything because it gives you a nice CLI and makes debugging easy.

You can run it with no arguments, but you probably want to pass some tags for which tests to run--the full suite takes a while. You probably want something like [amm] ~[old] to just run the MADDNESS tests and exclude legacy / debugging tests.

fjrdev · 2022-04-20T21:43:40Z

Hi @dumpinfo, @troyliu0105, it seems like you have worked with mithral in its C++ implementation. Could you find files where the parameter optimization takes place? Running run_matmul()in mithral_amm_task() obviously does not work, since the parameters necessary for the encoding and lut-creation are set randomly. Or does one have to provide the learning-phase?

dblalock · 2022-04-23T00:44:33Z

@fjrdev: there is no learning available in the C++ API, only the python implementation. So you'd have to generate the split thresholds, split values, and prototypes in python and pass them to C++. Which this repo doesn't support doing yet (and I likely won't add it in the foreseeable future because I wrote this code 2.5y ago and am super busy with work + family these days).

dumpinfo · 2022-04-23T15:00:51Z

implemented in "python/clusterize.py"

dumpinfo · 2022-04-23T15:03:09Z

mpetri · 2022-06-21T02:37:56Z

@dumpinfo have you managed to port the python learning code to c++ or at least were able to extract the learnt parameters into the c++ codebase?

dblalock mentioned this issue Sep 27, 2021

I‘m beginner，want to run this program in C++ #18

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

basic matrix multiply example for C++ api? #17

basic matrix multiply example for C++ api? #17

troyliu0105 commented Sep 12, 2021

dumpinfo commented Sep 13, 2021

troyliu0105 commented Sep 13, 2021

a3413209 commented Sep 21, 2021

dblalock commented Sep 27, 2021

fjrdev commented Apr 20, 2022 •

edited

Loading

dblalock commented Apr 23, 2022

dumpinfo commented Apr 23, 2022

dumpinfo commented Apr 23, 2022

mpetri commented Jun 21, 2022

basic matrix multiply example for C++ api? #17

basic matrix multiply example for C++ api? #17

Comments

troyliu0105 commented Sep 12, 2021

dumpinfo commented Sep 13, 2021

troyliu0105 commented Sep 13, 2021

a3413209 commented Sep 21, 2021

dblalock commented Sep 27, 2021

fjrdev commented Apr 20, 2022 • edited Loading

dblalock commented Apr 23, 2022

dumpinfo commented Apr 23, 2022

dumpinfo commented Apr 23, 2022

mpetri commented Jun 21, 2022

fjrdev commented Apr 20, 2022 •

edited

Loading