pnnl · AK2000 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -107,6 +107,8 @@ add_custom_target(comet-headers)
 set_target_properties(comet-headers PROPERTIES FOLDER "Misc")
 add_custom_target(comet-doc)
 
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
 # Add MLIR, LLVM and BLIS headers to the include path
 include_directories(${LLVM_INCLUDE_DIRS})
 include_directories(${MLIR_INCLUDE_DIRS})
@@ -182,3 +184,9 @@ if (STANDALONE_INSTALL)
   message(STATUS "Setting an $ORIGIN-based RPATH on all executables")
   set_rpath_all_targets(${CMAKE_CURRENT_SOURCE_DIR})
 endif()
+
+option(DEBUG_MODE "Create a installation with debug information" off)
+if (DEBUG_MODE)
+  message(STATUS "Building comet in debug mode")
+  add_compile_options(-DCOMET_DEBUG_MODE)
+endif()
diff --git a/frontends/comet_dsl/CMakeLists.txt b/frontends/comet_dsl/CMakeLists.txt
@@ -23,7 +23,7 @@ set(LIBS
   COMETUtils
   COMETTensorAlgebraDialect
   COMETIndexTreeDialect
-  COMETIndexTreeToSCF
+  # COMETIndexTreeToSCF
 )
 
 target_link_libraries(comet-opt 

diff --git a/frontends/comet_dsl/comet.cpp b/frontends/comet_dsl/comet.cpp
@@ -41,6 +41,9 @@
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/Func/Transforms/Passes.h"
 #include "mlir/Dialect/Tensor/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Transforms/Passes.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
@@ -245,7 +248,7 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   pm.addPass(mlir::comet::createFuncOpLoweringPass());
 
   mlir::OpPassManager &optPM = pm.nest<mlir::func::FuncOp>();
-  optPM.addPass(mlir::comet::createRemoveLabeledTensorOpsPass());
+  // optPM.addPass(mlir::comet::createRemoveLabeledTensorOpsPass());
 
   /// Check to see if we are dumping to TA dialect.
   if (emitTA)
@@ -287,17 +290,15 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
     /// Generate the index tree IR
     optPM.addPass(mlir::comet::createLowerTensorAlgebraToIndexTreePass());
 
-    if (OptKernelFusion)
-    {
-      /// Apply partial fusion on index tree dialect for some compound expressions.
-      optPM.addPass(mlir::comet::createIndexTreeKernelFusionPass());
-    }
+    // Create new pass manager to optimize the index tree dialect
+    // mlir::OpPassManager &itOptPM = optPM.nest<IndexTreeOp>();
+    optPM.addPass(mlir::comet::createIndexTreeDomainInferencePass());
 
-    if (OptWorkspace)
-    {
-      /// Optimized workspace transformations, reduce iteration space for nonzero elements
-      optPM.addPass(mlir::comet::createIndexTreeWorkspaceTransformationsPass());
-    }
+    // if (OptKernelFusion)
+    // {
+    //   /// Apply partial fusion on index tree dialect for some compound expressions.
+    //   optPM.addPass(mlir::comet::createIndexTreeKernelFusionPass());
+    // }
 
     /// Dump index tree dialect.
     if (emitIT)
@@ -319,8 +320,9 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   /// sparse input tensor declaration lowering, also generate sparse_output_tensor declaration if needed
   /// input and output sparse tensor declaration lowering are distant and need different information
   optPM.addPass(mlir::comet::createSparseTensorDeclLoweringPass());
-  // optPM.addPass(mlir::comet::createSparseOutputTensorDeclLoweringPass());
   optPM.addPass(mlir::comet::createDenseTensorDeclLoweringPass());
+  optPM.addPass(mlir::comet::createSparseTempOutputTensorDeclLoweringPass());
+  optPM.addPass(mlir::comet::createSparseOutputTensorDeclLoweringPass());
   optPM.addPass(mlir::comet::createTensorFillLoweringPass());
 
   /// =============================================================================
@@ -332,75 +334,83 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
     optPM.addPass(mlir::comet::createLoweringTTGTPass(IsSelectBestPermTTGT, selectedPermNum, IsPrintFlops));
   }
 
-  /// =============================================================================
-  /// Operation based optimizations
-  /// =============================================================================
-  if (OptMatmulTiling)
-  {
-    optPM.addPass(mlir::comet::createLinAlgMatmulTilingPass());
-  }
+  // /// =============================================================================
+  // /// Operation based optimizations
+  // /// =============================================================================
+  // if (OptMatmulTiling)
+  // {
+  //   optPM.addPass(mlir::comet::createLinAlgMatmulTilingPass());
+  // }
 
-  if (OptCallToMatMulMicroKernel)
-  {
-    optPM.addPass(mlir::comet::createLinAlgMatmulMicroKernelPass());
-  }
+  // if (OptCallToMatMulMicroKernel)
+  // {
+  //   optPM.addPass(mlir::comet::createLinAlgMatmulMicroKernelPass());
+  // }
 
   /// =============================================================================
   /// Lowering all the operations to loops
   /// =============================================================================
   if (IsLoweringtoSCF || emitLoops || emitLLVM)
-  {
-    /// Workspace transformations will create new dense tensor declarations, so we need to call createDenseTensorDeclLoweringPass
-    optPM.addPass(mlir::comet::createDenseTensorDeclLoweringPass());            /// lowers dense input/output tensor declaration
-    optPM.addPass(mlir::comet::createSparseTempOutputTensorDeclLoweringPass()); /// Temporary sparse output tensor declarations introduced by compound expressions
-                                                                                /// should be lowered before sparse output tensor declarations
-    optPM.addPass(mlir::comet::createSparseOutputTensorDeclLoweringPass());     /// lowering for sparse output tensor declarations
-                                                                                //(sparse_output_tensor_decl and temp_sparse_output_tensor_decl)
-    /// The partial Fusion pass might add new tensor.fill operations
-    optPM.addPass(mlir::comet::createTensorFillLoweringPass());
-    optPM.addPass(mlir::comet::createPCToLoopsLoweringPass());
-
+  { 
     /// =============================================================================
     /// Lowering of other operations such as transpose, sum, etc. to SCF dialect
     /// =============================================================================
     /// If it is a transpose of dense tensor, the rewrites rules replaces ta.transpose with linalg.copy.
     /// If it is a transpose of sparse tensor, it lowers the code to make a runtime call to specific sorting algorithm
     optPM.addPass(mlir::comet::createLowerTensorAlgebraToSCFPass());
 
-    /// Finally lowering index tree to SCF dialect
-    optPM.addPass(mlir::comet::createLowerIndexTreeToSCFPass());
-    optPM.addPass(mlir::createTensorBufferizePass());
-    pm.addPass(mlir::func::createFuncBufferizePass()); /// Needed for func
+    /// Concretize the domains of all the index variables
+    optPM.addPass(mlir::comet::createIndexTreeDomainConcretizationPass());
 
-    if (OptDenseTransposeOp) /// Optimize Dense Transpose operation
-    {
-      /// If it is a dense transpose ops, the rewrites rules replaces ta.transpose with linalg.transpose, then
-      /// Create a pass to optimize LinAlg Copy Op - follow in HPTT paper
-      /// HPTT: A High-Performance Tensor Transposition C++ Library
-      /// https://arxiv.org/abs/1704.04374
-      optPM.addPass(mlir::comet::createOptDenseTransposePass());
+    if (OptWorkspace) {
+      /// Optimized workspace transformations, reduce iteration space for nonzero elements
+      optPM.addPass(mlir::comet::createIndexTreeWorkspaceTransformationsPass());
     }
 
+    optPM.addPass(mlir::comet::createIndexTreeSymbolicComputePass());
+
+    /// Finally lowering index tree to SCF dialect
+    optPM.addPass(mlir::comet::createLowerIndexTreeToSCFPass());
+    optPM.addPass(mlir::comet::createConvertSymbolicDomainsPass());
+    optPM.addPass(mlir::comet::createSparseTensorConversionPass());
+    optPM.addPass(mlir::comet::createIndexTreeInliningPass());
+    optPM.addPass(mlir::createCanonicalizerPass());
+
+  //   if (OptDenseTransposeOp) /// Optimize Dense Transpose operation
+  //   {
+  //     /// If it is a dense transpose ops, the rewrites rules replaces ta.transpose with linalg.transpose, then
+  //     /// Create a pass to optimize LinAlg Copy Op - follow in HPTT paper
+  //     /// HPTT: A High-Performance Tensor Transposition C++ Library
+  //     /// https://arxiv.org/abs/1704.04374
+  //     optPM.addPass(mlir::comet::createOptDenseTransposePass());
+  //   }
+
     /// Dump index tree dialect.
     if (emitLoops)
     {
       if (mlir::failed(pm.run(*module)))
         return 4;
       return 0;
     }
-    ///  =============================================================================
   }
+  ///  =============================================================================
 
-  /// =============================================================================
-  /// Late lowering passes
-  /// =============================================================================
+  // /// =============================================================================
+  // /// Late lowering passes
+  // /// =============================================================================
+  // pm.addPass(mlir::bufferization::createEmptyTensorToAllocTensorPass());
+  mlir::bufferization::OneShotBufferizationOptions opts;
+  opts.allowUnknownOps = true;
+  pm.addPass(mlir::bufferization::createOneShotBufferizePass(opts));
 
-  optPM.addPass(mlir::comet::createSTCRemoveDeadOpsPass());
-  optPM.addPass(mlir::comet::createLateLoweringPass());
-  optPM.addPass(mlir::createCanonicalizerPass());
-  optPM.addPass(mlir::createCSEPass());
+  mlir::OpPassManager &late_lowering_pm = pm.nest<mlir::func::FuncOp>();
+  late_lowering_pm.addPass(mlir::comet::createSTCRemoveDeadOpsPass());
+  late_lowering_pm.addPass(mlir::comet::createLateLoweringPass());
+
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::createCSEPass());
 
-  /// =============================================================================
+  // /// =============================================================================
 
   if (isLoweringToLLVM || emitLLVM)
   {
@@ -481,6 +491,7 @@ int main(int argc, char **argv)
   context.loadDialect<mlir::linalg::LinalgDialect>();
   context.loadDialect<mlir::scf::SCFDialect>();
   context.loadDialect<mlir::bufferization::BufferizationDialect>();
+  context.loadDialect<mlir::index::IndexDialect>();
 
   mlir::OwningOpRef<mlir::ModuleOp> module;
 

diff --git a/frontends/comet_dsl/include/Lexer.h b/frontends/comet_dsl/include/Lexer.h
@@ -35,7 +35,7 @@
 #include <string>
 
 // *********** For debug purpose *********//
-//#define COMET_DEBUG_MODE
+// #define COMET_DEBUG_MODE
 #include "comet/Utils/debug.h"
 #undef COMET_DEBUG_MODE
 // *********** For debug purpose *********//

diff --git a/frontends/comet_dsl/mlir/MLIRGen.cpp b/frontends/comet_dsl/mlir/MLIRGen.cpp
@@ -71,7 +71,7 @@ using llvm::Twine;
 using StringSet = std::set<std::string>;
 
 // *********** For debug purpose *********//
-//#define COMET_DEBUG_MODE
+// #define COMET_DEBUG_MODE
 #include "comet/Utils/debug.h"
 #undef COMET_DEBUG_MODE
 // *********** For debug purpose *********//
@@ -591,23 +591,41 @@ namespace
         comet_debug() << "\n";
 
         auto lhs_tensor = lhs.getDefiningOp()->getOpResult(0).getType();
-        assert(lhs_tensor.isa<mlir::TensorType>());
 
         comet_pdump(lhs.getDefiningOp());
+
         auto lhs_labeledtensor = lhs.getDefiningOp()->getOpResult(0);
 
         comet_vdump(lhs_labeledtensor); // ta.labeled_tensor
-        auto lhs_el_type = lhs_tensor.cast<mlir::TensorType>().getElementType();
+        mlir::Type lhs_el_type;
+        if(auto tensor_type = llvm::dyn_cast<mlir::TensorType>(lhs_tensor)){
+          lhs_el_type = tensor_type.getElementType();
+        }
+        else if(auto tensor_type = llvm::dyn_cast<SparseTensorType>(lhs_tensor)){
+          lhs_el_type = tensor_type.getElementType();
+        }
+        else {
+          assert(false && "Expected a tensor input");
+        }
 
         auto rhs_tensor = rhs.getDefiningOp()->getOpResult(0).getType();
 
         comet_pdump(rhs.getDefiningOp());
-        assert(rhs_tensor.isa<mlir::TensorType>());
 
         auto rhs_labeledtensor = rhs.getDefiningOp()->getOpResult(0);
 
         comet_vdump(rhs_labeledtensor);
-        auto rhs_el_type = rhs_tensor.cast<mlir::TensorType>().getElementType();
+        mlir::Type rhs_el_type;
+        if(auto tensor_type = llvm::dyn_cast<mlir::TensorType>(rhs_tensor)){
+          rhs_el_type = tensor_type.getElementType();
+        }
+        else if(auto tensor_type = llvm::dyn_cast<SparseTensorType>(rhs_tensor)){
+          rhs_el_type = tensor_type.getElementType();
+        }
+        else {
+          assert(false && "Expected a tensor input");
+        }
+
         auto result_type = getBinOpResultType(lhs_el_type, rhs_el_type);
         comet_debug() << __LINE__ << " ";
         comet_vdump(result_type);
@@ -817,8 +835,6 @@ namespace
         }
 
         std::vector<int64_t> result_dims = getDimSizes(ret_lbls_value);
-        auto ret_tensor_type = mlir::RankedTensorType::get(result_dims, result_type);
-
         auto affineMapArrayAttr = builder.getAffineMapArrayAttr(affine_maps);
 
         SmallVector<mlir::StringRef, 8> formats;
@@ -1000,18 +1016,29 @@ namespace
         }
         comet_debug() << __LINE__ << " formats.size(): " << formats.size() << "\n";
         assert(formats.size() == 2 && " less than 2 input tensors\n");
+        mlir::Type ret_tensor_type;
         if (formats[0].compare("CSR") == 0 && formats[1].compare("CSR") == 0)
         {
           formats.push_back("CSR");
+          std::vector format_array = getFormats("CSR", result_dims.size(), builder.getContext());
+          ret_tensor_type = SparseTensorType::get(builder.getContext(), result_type, result_dims, format_array);
         }
         else if (formats[0].compare("Dense") == 0 && formats[1].compare("Dense") == 0)
         {
           formats.push_back("Dense");
+          ret_tensor_type = mlir::RankedTensorType::get(result_dims, result_type);
         }
         else if (out_format.length() > 0) // non-empty format string provided.
         {
           comet_debug() << " Output Format: " << out_format << "\n";
           formats.push_back(out_format);
+          if(out_format.compare("Dense") == 0)
+          {
+            ret_tensor_type = mlir::RankedTensorType::get(result_dims, result_type);
+          } else {
+            std::vector format_array = getFormats(out_format, result_dims.size(), builder.getContext());
+            ret_tensor_type = SparseTensorType::get(builder.getContext(), result_type, result_dims, format_array);
+          }
         }
         else
         {
@@ -1604,9 +1631,24 @@ namespace
       if (isDense(formats_str, ", ") == false)
       {
         /// BoolAttr is false because there is explicit sparse densor declaration.
-        /// SparseTensorDeclOp is not for temporaries in compound expressions
+        /// SparseTensorDeclOp is not for temporaries in compound expression
+        std::vector<int32_t> format = mlir::tensorAlgebra::getFormats(tensor_format, dims_sizes.size(), builder.getContext());
+        mlir::Type element_type;
+        switch (vartype.elt_ty)
+        {
+          case VarType::TY_FLOAT:
+            element_type =  builder.getF32Type();
+            break;
+          case VarType::TY_DOUBLE:
+            element_type = builder.getF64Type();
+            break;
+          case VarType::TY_INT:
+            element_type  = builder.getIntegerType(64);
+            break;
+        }
+        auto sp_tensor_type = SparseTensorType::get(builder.getContext(), element_type, dims_sizes, format);
         value = builder.create<SparseTensorDeclOp>(loc(tensordecl.loc()),
-                                                   tensor_type, labels, tensor_format, false);
+                                                   sp_tensor_type, labels, tensor_format, false);
         comet_debug() << "MLIRGen SparseTensorDeclaration creation\n";
         comet_vdump(value);
       }
@@ -1864,6 +1906,10 @@ namespace
         mlir::StringRef format_strref = dyn_cast<SparseTensorDeclOp>(rhs_tensor.getDefiningOp()).getFormat();
         mlir::StringAttr formatAttr = builder.getStringAttr(format_strref);
 
+        std::vector<int32_t> format = mlir::tensorAlgebra::getFormats(format_strref, result_dims.size(), builder.getContext());
+        mlir::Type element_type = builder.getF64Type();
+        return_type = SparseTensorType::get(builder.getContext(), element_type, result_dims, format);
+
         /// no lhs_LabeledTensor has been created. The output tensor of tranpose doesn't have explicit declaration,
         /// BoolAttr is true to speficy SparseTensorDeclOp is for temporaries
         lhs_tensor = builder.create<SparseTensorDeclOp>(loc(transpose.loc()), return_type, lhs_labels_val, formatAttr, builder.getBoolAttr(true));

diff --git a/include/comet/Conversion/IndexTreeToSCF/IndexTreeToSCF.h b/include/comet/Conversion/IndexTreeToSCF/IndexTreeToSCF.h
@@ -34,16 +34,13 @@ namespace mlir
     namespace comet
     {
 #define GEN_PASS_DECL_CONVERTINDEXTREETOSCF
+#define GEN_PASS_DECL_CONVERTSYMBOLICDOMAINS
 #include "comet/Conversion/Passes.h.inc"
-
-        /// Collect a set of patterns to convert IndexTree operations to SCF
-        /// operations within the SCF dialect.
-        void populateIndexTreeToSCFConversionPatterns(RewritePatternSet &patterns);
-
         /// Lowers indexTree operations (e.g., IndexTreeComputeLHSOp, IndexTreeComputeRHSOp and IndexTreeComputeOp)
         /// to equivalent scf constructs including basic blocks and arithmetic
         /// primitives).
         std::unique_ptr<Pass> createLowerIndexTreeToSCFPass();
+        std::unique_ptr<Pass> createConvertSymbolicDomainsPass();
     }
 } // namespace mlir