LibRapid · Pencilcaseman · Aug 2, 2023 · Aug 1, 2023 · Aug 1, 2023 · Aug 1, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -33,9 +33,9 @@ option(LIBRAPID_GET_BLAS "Download pre-built OpenBLAS binaries and use them" OFF
 option(LIBRAPID_GET_MULTIPREC "Download generic multiprecision libraries, as opposed to trying to find one on the system" OFF)
 
 option(LIBRAPID_USE_BLAS "Attempt to use a BLAS library" ON)
-option(LIBRAPID_USE_CUDA "Attempt to use CUDA" ON)
 option(LIBRAPID_USE_OMP "Attempt to use OpenMP to allow multithreading" ON)
 option(LIBRAPID_USE_OPENCL "Search for OpenCL and use it if possible" ON)
+option(LIBRAPID_USE_CUDA "Attempt to use CUDA" ON)
 option(LIBRAPID_USE_MULTIPREC "Include MPIR and MPFR in the LibRapid build" OFF)
 option(LIBRAPID_FAST_MATH "Use potentially less accurate operations to increase performance" OFF)
 option(LIBRAPID_NATIVE_ARCH "Use the native architecture of the system" OFF)

diff --git a/docs/source/cmakeIntegration.md b/docs/source/cmakeIntegration.md
@@ -88,35 +88,53 @@ Download a precompiled OpenBLAS build for your platform, and link it with LibRap
 Always prefer to use your system's BLAS installation if possible.
 :::
 
-### ``LIBRAPID_USE_CUDA``
+### ``LIBRAPID_USE_OMP``
 
 ```
 DEFAULT: ON
 ```
 
-Search for CUDA and link LibRapid with it. This is required for GPU support.
+If OpenMP is found on the system, link LibRapid with it. This is required for multi-threading support and can
+significantly improve performance.
+
+:::{warning}
+If this flag is enabled and OpenMP is not found installed on the system, the build will continue without OpenMP support.
+:::
+
+### ``LIBRAPID_USE_OPENCL``
+
+```
+DEFAULT: ON
+```
+
+Search for OpenCL and link LibRapid with it. This is required for OpenCL support.
 
 :::warning
-If this flag is enabled and CUDA is not found installed on the system, the build will continue without CUDA support.
+If this flag is enabled and OpenCL is not found installed on the system, the build will continue without OpenCL support.
 :::
 
 :::{danger}
-LibRapid's CUDA support appears to only works on Windows, for some reason. I have no way of testing it on Linux or
-MacOS, so I can't guarantee that it will work. If you have experience in this area, please feel free to contact me and
-we can work together to get it working.
+If you are using OpenCL as a backend in your code, you must call ``librapid::configureOpenCL()`` before using any
+OpenCL arrays. This function will initialise the OpenCL context and queue, compile the OpenCL kernels and configure the
+OpenCL device for optimal performance. See the documentation for this function for more information.
 :::
 
-### ``LIBRAPID_USE_OMP``
+### ``LIBRAPID_USE_CUDA``
 
 ```
 DEFAULT: ON
 ```
 
-If OpenMP is found on the system, link LibRapid with it. This is required for multi-threading support and can
-significantly improve performance.
+Search for CUDA and link LibRapid with it. This is required for GPU support.
 
-:::{warning}
-If this flag is enabled and OpenMP is not found installed on the system, the build will continue without OpenMP support.
+:::warning
+If this flag is enabled and CUDA is not found installed on the system, the build will continue without CUDA support.
+:::
+
+:::{danger}
+LibRapid's CUDA support appears to only works on Windows, for some reason. I have no way of testing it on Linux or
+MacOS, so I can't guarantee that it will work. If you have experience in this area, please feel free to contact me and
+we can work together to get it working.
 :::
 
 ### ``LIBRAPID_USE_MULTIPREC``

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -9,5 +9,8 @@ function(make_example name)
 endfunction()
 
 make_example(array-1)
+make_example(array-2)
 make_example(vector-1)
 make_example(complex-1)
+make_example(opencl)
+make_example(cuda)
diff --git a/examples/example-array-1.cpp b/examples/example-array-1.cpp
@@ -2,7 +2,7 @@
 
 namespace lrc = librapid;
 
-int main() {
+auto main() -> int {
  fmt::print("LibRapid Example -- Array 1\n");
 
  // Create a vector with 10 elements

diff --git a/examples/example-array-2.cpp b/examples/example-array-2.cpp
@@ -0,0 +1,35 @@
+#include <librapid>
+namespace lrc = librapid;
+using namespace lrc::literals;
+
+auto main() -> int {
+ /*
+ * 1. Create two arrays. One 2x3 and one 3x1
+ * 2. Perform a matrix multiplication
+ * 3. Print the results
+ */
+
+ std::vector<std::vector<float>> firstData = {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}};
+ std::vector<std::vector<float>> secondData = {{1.0f}, {2.0f}, {3.0f}};
+
+ auto firstMatrix = lrc::Array<float>::fromData(firstData);
+ auto secondMatrix = lrc::Array<float>::fromData(secondData);
+ auto firstResult = lrc::dot(firstMatrix, secondMatrix);
+ fmt::print("Left:\n{}\n", firstMatrix);
+ fmt::print("Right:\n{}\n", secondMatrix);
+ fmt::print("Result:\n{}\n", firstResult);
+
+ /*
+ * 4. Create a new array of shape 4x3, filled with random numbers in the range [-1, 1)
+ * 5. Perform a matrix multiplication with the transpose of `firstMatrix`
+ * 6. Print the results
+ */
+
+ auto thirdMatrix = lrc::random<float>(lrc::Shape({4, 3}), -1, 1);
+ auto secondResult = lrc::dot(thirdMatrix, lrc::transpose(firstMatrix));
+ fmt::print("Left:\n{:.2f}\n", thirdMatrix); // Format values to 2 decimal places
+ fmt::print("Right:\n{}\n", lrc::transpose(firstMatrix));
+ fmt::print("Result:\n{:.2f}\n", secondResult);
+
+ return 0;
+}
diff --git a/examples/example-complex-1.cpp b/examples/example-complex-1.cpp
@@ -2,7 +2,7 @@
 
 namespace lrc = librapid;
 
-int main() {
+auto main() -> int {
  fmt::print("LibRapid Example -- Complex 1\n");
 
  fmt::print("sqrt(-1) = {}\n", lrc::sqrt(lrc::Complex(-1)));

diff --git a/examples/example-cuda.cpp b/examples/example-cuda.cpp
@@ -0,0 +1,40 @@
+#include <librapid>
+
+namespace lrc = librapid;
+
+auto main() -> int {
+#if defined(LIBRAPID_HAS_CUDA)
+ auto cudaArray = lrc::Array<float, lrc::backend::CUDA>::fromData({{1, 2, 3}, {4, 5, 6}});
+ fmt::print("CUDA Array:\n{}\n", cudaArray);
+
+ // Operations on CUDA arrays work exactly the same as on CPU arrays
+ auto sum = cudaArray + cudaArray;
+ auto prod = sum * sum * 10;
+ fmt::print("(x + x) * (x + x) * 10:\n{}\n", prod);
+
+ // All accessing methods work as well (though some are faster than others)
+ // Note that you MUST use `auto` or `auto &` (NOT `const auto &`). This is because of how
+ // the data is represented internally and how our iterators work. For more information,
+ // check out the documentation:
+ // https://librapid.readthedocs.io/en/latest/topics/arrayIterators.html#implicit-iteration
+ fmt::print("Accessing elements: ");
+ for (auto val : prod) {
+ for (auto v : val) { fmt::print("{} ", v); }
+ }
+ fmt::print("\n");
+
+ // Linear algebra operations also work
+ fmt::print("Transposed CUDA Array:\n{}\n", lrc::transpose(prod));
+
+ auto vector = lrc::Array<float, lrc::backend::CUDA>::fromData({{1, 2, 3}});
+ fmt::print("Array: \n{}\n", cudaArray);
+ fmt::print("Vector: \n{}\n", vector);
+ fmt::print("Matrix dot Vector^T:\n{}\n", lrc::dot(cudaArray, lrc::transpose(vector)));
+#else
+ fmt::print("OpenCL not enabled in this build of librapid\n");
+ fmt::print("Check the documentation for more information on enabling OpenCL\n");
+ fmt::print("https://librapid.readthedocs.io/en/latest/cmakeIntegration.html#librapid-use-cuda\n");
+#endif // LIBRAPID_HAS_CUDA
+
+ return 0;
+}
diff --git a/examples/example-opencl.cpp b/examples/example-opencl.cpp
@@ -0,0 +1,45 @@
+#include <librapid>
+
+namespace lrc = librapid;
+
+auto main() -> int {
+#if defined(LIBRAPID_HAS_OPENCL)
+ // Must be called to enable OpenCL. Passing `true` logs the devices
+ // available and the one selected. Set to false for a cleaner output.
+ // (You can pass (true, true) to select the device manually)
+ lrc::configureOpenCL(true);
+
+ auto openclArray = lrc::Array<float, lrc::backend::OpenCL>::fromData({{1, 2, 3}, {4, 5, 6}});
+ fmt::print("OpenCL Array:\n{}\n", openclArray);
+
+ // Operations on OpenCL arrays work exactly the same as on CPU arrays
+ auto sum = openclArray + openclArray;
+ auto prod = sum * sum * 10;
+ fmt::print("(x + x) * (x + x) * 10:\n{}\n", prod);
+
+ // All accessing methods work as well (though some are faster than others)
+ // Note that you MUST use `auto` or `auto &` (NOT `const auto &`). This is because of how
+ // the data is represented internally and how our iterators work. For more information,
+ // check out the documentation:
+ // https://librapid.readthedocs.io/en/latest/topics/arrayIterators.html#implicit-iteration
+ fmt::print("Accessing elements: ");
+ for (auto val : prod) {
+ for (auto v : val) { fmt::print("{} ", v); }
+ }
+ fmt::print("\n");
+
+ // Linear algebra operations also work
+ fmt::print("Transposed OpenCL Array:\n{}\n", lrc::transpose(prod));
+
+ auto vector = lrc::Array<float, lrc::backend::OpenCL>::fromData({{1, 2, 3}});
+ fmt::print("Array: \n{}\n", openclArray);
+ fmt::print("Vector: \n{}\n", vector);
+ fmt::print("Matrix dot Vector^T:\n{}\n", lrc::dot(openclArray, lrc::transpose(vector)));
+#else
+ fmt::print("OpenCL not enabled in this build of librapid\n");
+ fmt::print("Check the documentation for more information on enabling OpenCL\n");
+ fmt::print("https://librapid.readthedocs.io/en/latest/cmakeIntegration.html#librapid-use-opencl\n");
+#endif
+
+ return 0;
+}
diff --git a/examples/example-vector-1.cpp b/examples/example-vector-1.cpp
@@ -2,12 +2,9 @@
 
 namespace lrc = librapid;
 
-int main() {
+auto main() -> int {
  fmt::print("LibRapid Example -- Vector 1\n");
 
- // I'm rewriting the entire vectory library, so this code currently does not work.
- // I'll have a new example up soon <3
- /*
  // Create a 3 dimensional vector
  lrc::Vec3d myVector(2, 3, 4);
  lrc::Vec3d myOtherVector(10, 5, 8);
@@ -17,19 +14,19 @@ int main() {
  // Simple operations
  fmt::print("Vec * Scalar: {}\n", myVector * 2);
  fmt::print("Vec * Vec: {}\n", myVector * myOtherVector);
- fmt::print("Vector dot product: {}\n", myVector.dot(myOtherVector));
- fmt::print("Vector cross product: {}\n", myVector.cross(myOtherVector));
+ fmt::print("Vector dot product: {}\n", lrc::dot(myVector, myOtherVector));
+ fmt::print("Vector cross product: {}\n", lrc::cross(myVector, myOtherVector));
 
  fmt::print("\nTrigonometry with Vectors:\n");
- auto cross = myVector.cross(myOtherVector);
- double theta1 = (myVector.dot(myOtherVector)) / (myVector.mag() * myOtherVector.mag());
- double theta2 = (myVector.dot(cross)) / (myVector.mag() * cross.mag());
+ auto cross = lrc::cross(myVector, myOtherVector);
+ double theta1 = (lrc::dot(myVector, myOtherVector)) / (lrc::mag(myVector) * lrc::mag(myOtherVector));
+ double theta2 = (lrc::dot(myVector, cross)) / (lrc::mag(myVector) * lrc::mag(cross));
  fmt::print("A cross B = {}\n", cross);
- fmt::print("Angle between A and B = {}\n", theta1);
+ fmt::print("Angle between A and B = {:.3f}\n", theta1);
  fmt::print("Angle between A and (A cross B) = {}pi\n", lrc::acos(theta2) / lrc::PI);
 
  // Functions operate on each element of a vector
- fmt::print("sin(Vec(pi/4, pi/3, pi/2, pi)) = {}\n",
+ fmt::print("sin(Vec(pi/4, pi/3, pi/2, pi)) = {:.3f}\n",
  lrc::sin(lrc::Vec4d(lrc::PI / 4, lrc::PI / 3, lrc::PI / 2, lrc::PI)));
 
  fmt::print("\n");
@@ -41,7 +38,20 @@ int main() {
  lrc::Vec3d start2(0, 0, 0);
  lrc::Vec3d end2(100, 100, 100);
  fmt::print("Mapping: {}\n", lrc::map(value, start1, end1, start2, end2));
- */
+
+ // Polar coordinates
+ auto polarVec = lrc::Vector<float, 2>::fromPolar(1, lrc::PI / 4);
+ fmt::print("Polar vector: {:.3f}\n", polarVec);
+
+ // Filled vectors
+ auto zero = lrc::Vec3d::zero();
+ auto one = lrc::Vec3d::one();
+ auto full = lrc::Vec3d::full(49);
+ auto random = lrc::Vec3d::random(-5, 5);
+ fmt::print("Zero vector: {}\n", zero);
+ fmt::print("One vector: {}\n", one);
+ fmt::print("Full vector: {}\n", full);
+ fmt::print("Random vector: {:.3f}\n", random);
 
  return 0;
 }
diff --git a/librapid/include/librapid/array/arrayFromData.hpp b/librapid/include/librapid/array/arrayFromData.hpp
@@ -63,23 +63,43 @@ namespace librapid {
  return res;
  }
 
+ //#define HIGHER_DIMENSIONAL_FROM_DATA(TYPE) \
+// template<typename Scalar, typename Backend> \
+// auto array::ArrayContainer<Scalar, Backend>::fromData(const TYPE &data) -> ArrayContainer { \
+// LIBRAPID_ASSERT(data.size() > 0, "Cannot create a zero-sized array"); \
+// auto *tmp = new ArrayContainer[data.size()]; \
+// int64_t index = 0; \
+// for (const auto &item : data) tmp[index++] = fromData(item); \
+// auto zeroShape = tmp[0].shape(); \
+// for (int64_t i = 0; i < data.size(); ++i) \
+// LIBRAPID_ASSERT(tmp[i].shape().operator==(zeroShape), \
+// "Arrays must have consistent shapes"); \
+// auto newShape = ShapeType::zeros(zeroShape.ndim() + 1); \
+// newShape[0] = data.size(); \
+// for (size_t i = 0; i < zeroShape.ndim(); ++i) { newShape[i + 1] = zeroShape[i]; } \
+// auto res = Array<Scalar, Backend>(newShape); \
+// index = 0; \
+// for (int64_t i = 0; i < data.size(); ++i) res[i] = std::move(tmp[i]); \
+// delete[] tmp; \
+// return res; \
+// }
+
 #define HIGHER_DIMENSIONAL_FROM_DATA(TYPE) \
  template<typename Scalar, typename Backend> \
  auto array::ArrayContainer<Scalar, Backend>::fromData(const TYPE &data) -> ArrayContainer { \
  LIBRAPID_ASSERT(data.size() > 0, "Cannot create a zero-sized array"); \
- auto *tmp = new ArrayContainer[data.size()]; \
+ std::vector<ArrayContainer> tmp(data.size());  \
  int64_t index = 0; \
- for (const auto &item : data) tmp[index++] = ArrayContainer(item); \
+ for (const auto &item : data) tmp[index++] = fromData(item);  \
  auto zeroShape = tmp[0].shape(); \
  for (int64_t i = 0; i < data.size(); ++i) \
- LIBRAPID_ASSERT(tmp[i].shape() == zeroShape, "Arrays must have consistent shapes"); \
+ LIBRAPID_ASSERT(tmp[i].shape().operator==(zeroShape), \
+ "Arrays must have consistent shapes"); \
  auto newShape = ShapeType::zeros(zeroShape.ndim() + 1); \
  newShape[0] = data.size(); \
  for (size_t i = 0; i < zeroShape.ndim(); ++i) { newShape[i + 1] = zeroShape[i]; } \
  auto res = Array<Scalar, Backend>(newShape); \
- index = 0; \
- for (int64_t i = 0; i < data.size(); ++i) res[i] = std::move(tmp[i]); \
- delete[] tmp; \
+ for (int64_t i = 0; i < data.size(); ++i) res[i] = tmp[i]; \
  return res; \
  }
 

diff --git a/librapid/include/librapid/array/arrayTypeDef.hpp b/librapid/include/librapid/array/arrayTypeDef.hpp
@@ -74,6 +74,12 @@ namespace librapid {
  typename Beta = typename StorageTypeB::Scalar>
  class ArrayMultiply;
  }
+
+ template<typename T>
+ using IsArrayType = std::integral_constant<
+ bool, (typetraits::TypeInfo<T>::type == detail::LibRapidType::ArrayContainer) ||
+ (typetraits::TypeInfo<T>::type == detail::LibRapidType::ArrayView) ||
+ (typetraits::TypeInfo<T>::type == detail::LibRapidType::ArrayFunction)>;
 } // namespace librapid
 
 #endif // LIBRAPID_ARRAY_TYPE_DEF_HPP
diff --git a/librapid/include/librapid/array/arrayView.hpp b/librapid/include/librapid/array/arrayView.hpp
@@ -160,7 +160,8 @@ namespace librapid {
  template<typename T>
  template<typename RefType>
  ArrayView<T> &ArrayView<T>::operator=(const ArrayRef<RefType> &other) {
- LIBRAPID_ASSERT(m_shape.operator==(other.shape()), "Cannot assign to a non-scalar ArrayView.");
+ LIBRAPID_ASSERT(m_shape.operator==(other.shape()),
+ "Cannot assign to a non-scalar ArrayView.");
 
  ShapeType coord = ShapeType::zeros(m_shape.ndim());
  int64_t d = 0, p = 0;

diff --git a/librapid/include/librapid/array/linalg/arrayMultiply.hpp b/librapid/include/librapid/array/linalg/arrayMultiply.hpp
@@ -651,7 +651,9 @@ namespace librapid {
  /// StorageTypeB The storage type of the right input array. \param a The left input array.
  /// \param b The right input array.
  /// \return The dot product of the two input arrays.
- template<typename First, typename Second>
+ template<
+ typename First, typename Second,
+ typename std::enable_if_t<IsArrayType<First>::value && IsArrayType<Second>::value, int> = 0>
  auto dot(First &&a, Second &&b) {
  using ScalarA = typename typetraits::TypeInfo<std::decay_t<First>>::Scalar;
  using ScalarB = typename typetraits::TypeInfo<std::decay_t<Second>>::Scalar;
@@ -669,6 +671,19 @@ namespace librapid {
  std::forward<ArrayB>(arrB),
  0); // .eval();
  }
+
+ namespace typetraits {
+ template<typename ShapeTypeA, typename StorageTypeA, typename ShapeTypeB,
+ typename StorageTypeB, typename Alpha, typename Beta>
+ struct TypeInfo<linalg::ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB,
+ Alpha, Beta>> {
+ detail::LibRapidType type = detail::LibRapidType::ArrayFunction;
+ using Type = linalg::ArrayMultiply<ShapeTypeA, StorageTypeA, ShapeTypeB, StorageTypeB,
+ Alpha, Beta>;
+ using Scalar = typename Type::Scalar;
+ using Backend = typename Type::Backend;
+ };
+ }
 } // namespace librapid
 
 LIBRAPID_SIMPLE_IO_IMPL(