diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 144433ac78a377..cdcc8cd61f4fa7 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -233,10 +233,6 @@ class DebugRangesSectionWriter {
 
   std::mutex WriterMutex;
 
-  /// Current offset in the section (updated as new entries are written).
-  /// Starts with 16 since the first 16 bytes are reserved for an empty range.
-  uint32_t SectionOffset{0};
-
   /// Offset of an empty address ranges list.
   static constexpr uint64_t EmptyRangesOffset{0};
 
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 08d4c45aac791d..579af3bce4eb83 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -138,8 +138,7 @@ DebugRangesSectionWriter::DebugRangesSectionWriter() {
   RangesStream = std::make_unique<raw_svector_ostream>(*RangesBuffer);
 
   // Add an empty range as the first entry;
-  SectionOffset +=
-      writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
+  writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
   Kind = RangesWriterKind::DebugRangesWriter;
 }
 
@@ -166,21 +165,20 @@ uint64_t DebugRangesSectionWriter::addRanges(DebugAddressRangesVector &Ranges) {
   // Reading the SectionOffset and updating it should be atomic to guarantee
   // unique and correct offsets in patches.
   std::lock_guard<std::mutex> Lock(WriterMutex);
-  const uint32_t EntryOffset = SectionOffset;
-  SectionOffset += writeAddressRanges(*RangesStream.get(), Ranges);
+  const uint32_t EntryOffset = RangesBuffer->size();
+  writeAddressRanges(*RangesStream.get(), Ranges);
 
   return EntryOffset;
 }
 
 uint64_t DebugRangesSectionWriter::getSectionOffset() {
   std::lock_guard<std::mutex> Lock(WriterMutex);
-  return SectionOffset;
+  return RangesBuffer->size();
 }
 
 void DebugRangesSectionWriter::appendToRangeBuffer(
     const DebugBufferVector &CUBuffer) {
   *RangesStream << CUBuffer;
-  SectionOffset = RangesBuffer->size();
 }
 
 DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;
@@ -327,7 +325,6 @@ void DebugRangeListsSectionWriter::finalizeSection() {
   *RangesStream << *Header;
   *RangesStream << *CUArrayBuffer;
   *RangesStream << *CUBodyBuffer;
-  SectionOffset = RangesBuffer->size();
 }
 
 void DebugRangeListsSectionWriter::initSection(DWARFUnit &Unit) {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 1f426d0adfc61c..3751d6bee772e3 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1342,10 +1342,7 @@ void DWARFRewriter::updateDWARFObjectAddressRanges(
       assert(RangesWriterIterator != LegacyRangesWritersByCU.end() &&
              "RangesWriter does not exist for DWOId");
       RangesWriterIterator->second->setDie(&Die);
-    } else if (Unit.getVersion() == 5) {
-      DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
-                       dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
-    } else {
+    } else if (Unit.getVersion() >= 5) {
       DIEBldr.addValue(&Die, dwarf::DW_AT_rnglists_base,
                        dwarf::DW_FORM_sec_offset, DIEInteger(*RangesBase));
     }
@@ -1638,14 +1635,13 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder,
            "RangesWriter does not exist for DWOId");
     std::unique_ptr<DebugRangesSectionWriter> &LegacyRangesWriter =
         RangesWriterIterator->second;
-    std::optional<DIE *> Die = LegacyRangesWriter->getDie();
-    if (!Die || !Die.value())
+    DIE *Die = LegacyRangesWriter->getDie();
+    if (!Die)
       continue;
-    DIEValue DvalGNUBase =
-        Die.value()->findAttribute(dwarf::DW_AT_GNU_ranges_base);
+    DIEValue DvalGNUBase = Die->findAttribute(dwarf::DW_AT_GNU_ranges_base);
     assert(DvalGNUBase && "GNU_ranges_base attribute does not exist for DWOId");
     DIEBlder.replaceValue(
-        Die.value(), dwarf::DW_AT_GNU_ranges_base, DvalGNUBase.getForm(),
+        Die, dwarf::DW_AT_GNU_ranges_base, DvalGNUBase.getForm(),
         DIEInteger(LegacyRangesSectionWriter->getSectionOffset()));
     std::unique_ptr<DebugBufferVector> RangesWritersContents =
         LegacyRangesWriter->releaseBuffer();
diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
index b8a39d339e0113..3e208053a3c85f 100644
--- a/bolt/test/X86/infer_no_exits.test
+++ b/bolt/test/X86/infer_no_exits.test
@@ -8,4 +8,4 @@
 
 # PREAGG: B X:0 #main# 1 0
 
-# CHECK: BOLT-INFO: inferred profile for 1 (100.00% of profiled, 100.00% of stale) functions responsible for -nan% samples (0 out of 0)
+# CHECK: BOLT-INFO: inferred profile for 1 (100.00% of profiled, 100.00% of stale) functions
diff --git a/bolt/test/X86/jt-symbol-disambiguation-4.s b/bolt/test/X86/jt-symbol-disambiguation-4.s
index d3d3dcd8070541..ba338a00d23d2d 100644
--- a/bolt/test/X86/jt-symbol-disambiguation-4.s
+++ b/bolt/test/X86/jt-symbol-disambiguation-4.s
@@ -10,7 +10,7 @@
 # REQUIRES: system-linux
 
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
-# RUN: %clang -no-pie %t.o -o %t.exe -Wl,-q
+# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt --funcs=main,foo/1 %t.exe -o %t.exe.bolt --print-normalized \
 # RUN:   2>&1 | FileCheck %s
 
@@ -18,7 +18,7 @@
 	.globl	main
 	.type	main,@function
 main:
-# CHECK: Binary Function "main"
+# CHECK: Binary Function "main
 	pushq   %rbp
 	movq	%rsp, %rbp
 	movq	$-16, %rax
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index dfa10a42f06e55..1d611ae299e328 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -559,6 +559,15 @@ Attribute Changes in Clang
        size_t count;
      };
 
+- The attributes ``sized_by``, ``counted_by_or_null`` and ``sized_by_or_null```
+  have been added as variants on ``counted_by``, each with slightly different semantics.
+  ``sized_by`` takes a byte size parameter instead of an element count, allowing pointees
+  with unknown size. The ``counted_by_or_null`` and ``sized_by_or_null`` variants are equivalent
+  to their base variants, except the pointer can be null regardless of count/size value.
+  If the pointer is null the size is effectively 0. ``sized_by_or_null`` is needed to properly
+  annotate allocator functions like ``malloc`` that return a buffer of a given byte size, but can
+  also return null.
+
 - The ``guarded_by``, ``pt_guarded_by``, ``acquired_after``, ``acquired_before``
   attributes now support referencing struct members in C. The arguments are also
   now late parsed when ``-fexperimental-late-parse-attributes`` is passed like
@@ -679,6 +688,9 @@ Improvements to Clang's diagnostics
 
 - Clang now shows implicit deduction guides when diagnosing overload resolution failure. #GH92393.
 
+- Clang no longer emits a "no previous prototype" warning for Win32 entry points under ``-Wmissing-prototypes``.
+  Fixes #GH94366.
+
 Improvements to Clang's time-trace
 ----------------------------------
 
@@ -994,6 +1006,7 @@ Bug Fixes to C++ Support
   evaluated to an integer. (#GH96670).
 - Fixed a bug where references to lambda capture inside a ``noexcept`` specifier were not correctly
   instantiated. (#GH95735).
+- Fixed a CTAD substitution bug involving type aliases that reference outer template parameters. (#GH94614).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst
index 1c3c4d319c0e18..cf0528e75e7f2c 100644
--- a/clang/docs/StandardCPlusPlusModules.rst
+++ b/clang/docs/StandardCPlusPlusModules.rst
@@ -1092,6 +1092,74 @@ A high-level overview of support for standards features, including modules, can
 be found on the `C++ Feature Status <https://clang.llvm.org/cxx_status.html>`_
 page.
 
+Missing VTables for classes attached to modules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Now the compiler may miss emitting the definition of vtables
+for classes attached to modules, if the definition of the class
+doesn't contain any key function in that module units
+(The key function is the first non-pure virtual function that is
+not inline at the point of class definition.)
+
+(Note: technically, the key function is not a thing for modules.
+We use the concept here for convinient.)
+
+For example,
+
+.. code-block:: c++
+
+  // layer1.cppm
+  export module foo:layer1;
+  struct Fruit {
+      virtual ~Fruit() = default;
+      virtual void eval() = 0;
+  };
+  struct Banana : public Fruit {
+      Banana() {}
+      void eval() override;
+  };
+
+  // layer2.cppm
+  export module foo:layer2;
+  import :layer1;
+  export void layer2_fun() {
+      Banana *b = new Banana();
+      b->eval();
+  }
+  void Banana::eval() {
+  }
+
+For the above example, we can't find the definition for the vtable of
+class ``Banana`` in any object files.
+
+The expected behavior is, for dynamic classes attached to named modules,
+the vtable should always be emitted to the module units the class attaches
+to.
+
+To workaround the problem, users can add the key function manually in the
+corresponding module units. e.g.,
+
+.. code-block:: c++
+
+  // layer1.cppm
+  export module foo:layer1;
+  struct Fruit {
+      virtual ~Fruit() = default;
+      virtual void eval() = 0;
+  };
+  struct Banana : public Fruit {
+      // Hack a key function to hint the compiler to emit the virtual table.
+      virtual void anchor();
+
+      Banana() {}
+      void eval() override;
+  };
+
+  void Banana::anchor() {}
+
+This is tracked by
+`#70585 <https://github.com/llvm/llvm-project/issues/70585>`_.
+
 Including headers after import is not well-supported
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 45dac82e540776..06ffc2ce09b890 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -670,16 +670,6 @@ class alignas(8) Decl {
   /// Whether this declaration comes from another module unit.
   bool isInAnotherModuleUnit() const;
 
-  /// Whether this declaration comes from the same module unit being compiled.
-  bool isInCurrentModuleUnit() const;
-
-  /// Whether the definition of the declaration should be emitted in external
-  /// sources.
-  bool shouldEmitInExternalSource() const;
-
-  /// Whether this declaration comes from a named module;
-  bool isInNamedModule() const;
-
   /// Whether this declaration comes from explicit global module.
   bool isFromExplicitGlobalModule() const;
 
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 82befc656820ef..d2d9dd24536cb0 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2292,6 +2292,36 @@ def CountedBy : DeclOrTypeAttr {
   let LangOpts = [COnly];
 }
 
+def CountedByOrNull : DeclOrTypeAttr {
+  let Spellings = [Clang<"counted_by_or_null">];
+  let Subjects = SubjectList<[Field], ErrorDiag>;
+  let Args = [ExprArgument<"Count">, IntArgument<"NestedLevel", 1>];
+  let LateParsed = LateAttrParseExperimentalExt;
+  let ParseArgumentsAsUnevaluated = 1;
+  let Documentation = [CountedByDocs];
+  let LangOpts = [COnly];
+}
+
+def SizedBy : DeclOrTypeAttr {
+  let Spellings = [Clang<"sized_by">];
+  let Subjects = SubjectList<[Field], ErrorDiag>;
+  let Args = [ExprArgument<"Size">, IntArgument<"NestedLevel", 1>];
+  let LateParsed = LateAttrParseExperimentalExt;
+  let ParseArgumentsAsUnevaluated = 1;
+  let Documentation = [CountedByDocs];
+  let LangOpts = [COnly];
+}
+
+def SizedByOrNull : DeclOrTypeAttr {
+  let Spellings = [Clang<"sized_by_or_null">];
+  let Subjects = SubjectList<[Field], ErrorDiag>;
+  let Args = [ExprArgument<"Size">, IntArgument<"NestedLevel", 1>];
+  let LateParsed = LateAttrParseExperimentalExt;
+  let ParseArgumentsAsUnevaluated = 1;
+  let Documentation = [CountedByDocs];
+  let LangOpts = [COnly];
+}
+
 // This is a marker used to indicate that an __unsafe_unretained qualifier was
 // ignored because ARC is not enabled. The usual representation for this
 // qualifier is as an ObjCOwnership attribute with Kind == "none".
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d67b279321dfca..0f114a408c38b4 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6570,28 +6570,28 @@ def warn_superclass_variable_sized_type_not_at_end : Warning<
   "field %0 can overwrite instance variable %1 with variable sized type %2"
   " in superclass %3">, InGroup<ObjCFlexibleArray>;
 
-def err_flexible_array_count_not_in_same_struct : Error<
-  "'counted_by' field %0 isn't within the same struct as the flexible array">;
-def err_counted_by_attr_not_on_ptr_or_flexible_array_member : Error<
-  "'counted_by' only applies to pointers or C99 flexible array members">;
+def err_count_attr_param_not_in_same_struct : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}1' field %0 isn't within the same struct as the annotated %select{pointer|flexible array}2">;
+def err_count_attr_not_on_ptr_or_flexible_array_member : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' only applies to pointers%select{ or C99 flexible array members|||}0%select{|; did you mean to use 'counted_by'?}1">;
 def err_counted_by_attr_on_array_not_flexible_array_member : Error<
   "'counted_by' on arrays only applies to C99 flexible array members">;
 def err_counted_by_attr_refer_to_itself : Error<
   "'counted_by' cannot refer to the flexible array member %0">;
-def err_counted_by_must_be_in_structure : Error<
-  "field %0 in 'counted_by' not inside structure">;
-def err_counted_by_attr_argument_not_integer : Error<
-  "'counted_by' requires a non-boolean integer type argument">;
-def err_counted_by_attr_only_support_simple_decl_reference : Error<
-  "'counted_by' argument must be a simple declaration reference">;
-def err_counted_by_attr_in_union : Error<
-  "'counted_by' cannot be applied to a union member">;
-def err_counted_by_attr_refer_to_union : Error<
-  "'counted_by' argument cannot refer to a union member">;
+def err_count_attr_must_be_in_structure : Error<
+  "field %0 in '%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}1' not inside structure">;
+def err_count_attr_argument_not_integer : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' requires a non-boolean integer type argument">;
+def err_count_attr_only_support_simple_decl_reference : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' argument must be a simple declaration reference">;
+def err_count_attr_in_union : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' cannot be applied to a union member">;
+def err_count_attr_refer_to_union : Error<
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}0' argument cannot refer to a union member">;
 def note_flexible_array_counted_by_attr_field : Note<
   "field %0 declared here">;
 def err_counted_by_attr_pointee_unknown_size : Error<
-  "'counted_by' %select{cannot|should not}3 be applied to %select{"
+  "'%select{counted_by|sized_by|counted_by_or_null|sized_by_or_null}4' %select{cannot|should not}3 be applied to %select{"
     "a pointer with pointee|" // pointer
     "an array with element}0" // array
   " of unknown size because %1 is %select{"
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index df72249433824f..2584b70338104d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -14608,7 +14608,9 @@ class Sema final : public SemaBase {
                            SourceLocation AttrLoc);
 
   QualType BuildCountAttributedArrayOrPointerType(QualType WrappedTy,
-                                                  Expr *CountExpr);
+                                                  Expr *CountExpr,
+                                                  bool CountInBytes,
+                                                  bool OrNull);
 
   /// BuildAddressSpaceAttr - Builds a DependentAddressSpaceType if an
   /// expression is uninstantiated. If instantiated it will apply the
diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h
index ce44aca797b0fb..0340c23fd170d6 100644
--- a/clang/include/clang/Sema/Template.h
+++ b/clang/include/clang/Sema/Template.h
@@ -711,6 +711,7 @@ enum class TemplateSubstitutionKind : char {
         VarTemplateSpecializationDecl *PrevDecl = nullptr);
 
     Decl *InstantiateTypedefNameDecl(TypedefNameDecl *D, bool IsTypeAlias);
+    Decl *InstantiateTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
     ClassTemplatePartialSpecializationDecl *
     InstantiateClassTemplatePartialSpecialization(
                                               ClassTemplateDecl *ClassTemplate,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 38502a23f805e5..488994c05dc122 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -721,9 +721,6 @@ enum ASTRecordTypes {
 
   /// Record code for \#pragma clang unsafe_buffer_usage begin/end
   PP_UNSAFE_BUFFER_USAGE = 69,
-
-  /// Record code for vtables to emit.
-  VTABLES_TO_EMIT = 70,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index f41c473c97cd92..76e51ac7ab9792 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -790,11 +790,6 @@ class ASTReader
   /// the consumer eagerly.
   SmallVector<GlobalDeclID, 16> EagerlyDeserializedDecls;
 
-  /// The IDs of all vtables to emit. The referenced declarations are passed
-  /// to the consumers's HandleVTable eagerly after passing
-  /// EagerlyDeserializedDecls.
-  SmallVector<GlobalDeclID, 16> VTablesToEmit;
-
   /// The IDs of all tentative definitions stored in the chain.
   ///
   /// Sema keeps track of all tentative definitions in a TU because it has to
@@ -1505,7 +1500,6 @@ class ASTReader
   bool isConsumerInterestedIn(Decl *D);
   void PassInterestingDeclsToConsumer();
   void PassInterestingDeclToConsumer(Decl *D);
-  void PassVTableToConsumer(CXXRecordDecl *RD);
 
   void finishPendingActions();
   void diagnoseOdrViolations();
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 71a7c28047e318..a0e475ec9f862c 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -500,10 +500,6 @@ class ASTWriter : public ASTDeserializationListener,
   std::vector<SourceRange> NonAffectingRanges;
   std::vector<SourceLocation::UIntTy> NonAffectingOffsetAdjustments;
 
-  /// A list of classes which need to emit the VTable in the corresponding
-  /// object file.
-  llvm::SmallVector<CXXRecordDecl *> PendingEmittingVTables;
-
   /// Computes input files that didn't affect compilation of the current module,
   /// and initializes data structures necessary for leaving those files out
   /// during \c SourceManager serialization.
@@ -861,8 +857,6 @@ class ASTWriter : public ASTDeserializationListener,
     return PredefinedDecls.count(D);
   }
 
-  void handleVTable(CXXRecordDecl *RD);
-
 private:
   // ASTDeserializationListener implementation
   void ReaderInitialized(ASTReader *Reader) override;
@@ -957,7 +951,6 @@ class PCHGenerator : public SemaConsumer {
 
   void InitializeSema(Sema &S) override { SemaPtr = &S; }
   void HandleTranslationUnit(ASTContext &Ctx) override;
-  void HandleVTable(CXXRecordDecl *RD) override { Writer.handleVTable(RD); }
   ASTMutationListener *GetASTMutationListener() override;
   ASTDeserializationListener *GetASTDeserializationListener() override;
   bool hasEmittedPCH() const { return Buffer->IsComplete; }
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 3970a4cc7c616e..ecccab08cbaab4 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -1197,7 +1197,7 @@ static bool isExportedFromModuleInterfaceUnit(const NamedDecl *D) {
     return false;
   case Decl::ModuleOwnershipKind::Visible:
   case Decl::ModuleOwnershipKind::VisibleWhenImported:
-    return D->isInNamedModule();
+    return isInNamedModule(D);
   }
   llvm_unreachable("unexpected module ownership kind");
 }
@@ -1215,7 +1215,7 @@ Linkage NamedDecl::getFormalLinkage() const {
   // [basic.namespace.general]/p2
   //   A namespace is never attached to a named module and never has a name with
   //   module linkage.
-  if (isInNamedModule() && InternalLinkage == Linkage::External &&
+  if (isInNamedModule(this) && InternalLinkage == Linkage::External &&
       !isExportedFromModuleInterfaceUnit(
           cast<NamedDecl>(this->getCanonicalDecl())) &&
       !isa<NamespaceDecl>(this))
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 7318841fd9721c..eef946e3aea2e4 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1141,28 +1141,6 @@ bool Decl::isInAnotherModuleUnit() const {
   return M != getASTContext().getCurrentNamedModule();
 }
 
-
-bool Decl::isInCurrentModuleUnit() const {
-  auto *M = getOwningModule();
-
-  if (!M || !M->isNamedModule())
-    return false;
-
-  return M == getASTContext().getCurrentNamedModule();
-}
-
-bool Decl::shouldEmitInExternalSource() const {
-  ExternalASTSource *Source = getASTContext().getExternalSource();
-  if (!Source)
-    return false;
-
-  return Source->hasExternalDefinitions(this) == ExternalASTSource::EK_Always;
-}
-
-bool Decl::isInNamedModule() const {
-  return getOwningModule() && getOwningModule()->isNamedModule();
-}
-
 bool Decl::isFromExplicitGlobalModule() const {
   return getOwningModule() && getOwningModule()->isExplicitGlobalModule();
 }
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 7c87fd587880ed..ffec3ef9d22692 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1934,6 +1934,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
     break;
 
   case attr::CountedBy:
+  case attr::CountedByOrNull:
+  case attr::SizedBy:
+  case attr::SizedByOrNull:
   case attr::LifetimeBound:
   case attr::TypeNonNull:
   case attr::TypeNullable:
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index cc7be64656e5b2..3b748d0249d57b 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -187,9 +187,15 @@ bool AMDGPUTargetInfo::initFeatureMap(
     return false;
 
   // TODO: Should move this logic into TargetParser
-  std::string ErrorMsg;
-  if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
-    Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
+  auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
+  switch (HasError.first) {
+  default:
+    break;
+  case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
+    Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
+    return false;
+  case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
+    Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
     return false;
   }
 
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 417d66a6457d86..a72ed2be44bce0 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -1080,38 +1080,28 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   if (!RD->isExternallyVisible())
     return llvm::GlobalVariable::InternalLinkage;
 
-  bool IsInNamedModule = RD->isInNamedModule();
-  // If the CXXRecordDecl are not in a module unit, we need to get
-  // its key function. We're at the end of the translation unit, so the current
-  // key function is fully correct.
-  const CXXMethodDecl *keyFunction =
-      IsInNamedModule ? nullptr : Context.getCurrentKeyFunction(RD);
-  if (IsInNamedModule || (keyFunction && !RD->hasAttr<DLLImportAttr>())) {
+  // We're at the end of the translation unit, so the current key
+  // function is fully correct.
+  const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD);
+  if (keyFunction && !RD->hasAttr<DLLImportAttr>()) {
     // If this class has a key function, use that to determine the
     // linkage of the vtable.
     const FunctionDecl *def = nullptr;
-    if (keyFunction && keyFunction->hasBody(def))
+    if (keyFunction->hasBody(def))
       keyFunction = cast<CXXMethodDecl>(def);
 
-    bool IsExternalDefinition =
-        IsInNamedModule ? RD->shouldEmitInExternalSource() : !def;
-
-    TemplateSpecializationKind Kind =
-        IsInNamedModule ? RD->getTemplateSpecializationKind()
-                        : keyFunction->getTemplateSpecializationKind();
-
-    switch (Kind) {
-    case TSK_Undeclared:
-    case TSK_ExplicitSpecialization:
+    switch (keyFunction->getTemplateSpecializationKind()) {
+      case TSK_Undeclared:
+      case TSK_ExplicitSpecialization:
       assert(
-          (IsInNamedModule || def || CodeGenOpts.OptimizationLevel > 0 ||
+          (def || CodeGenOpts.OptimizationLevel > 0 ||
            CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo) &&
-          "Shouldn't query vtable linkage without the class in module units, "
-          "key function, optimizations, or debug info");
-      if (IsExternalDefinition && CodeGenOpts.OptimizationLevel > 0)
+          "Shouldn't query vtable linkage without key function, "
+          "optimizations, or debug info");
+      if (!def && CodeGenOpts.OptimizationLevel > 0)
         return llvm::GlobalVariable::AvailableExternallyLinkage;
 
-      if (keyFunction && keyFunction->isInlined())
+      if (keyFunction->isInlined())
         return !Context.getLangOpts().AppleKext
                    ? llvm::GlobalVariable::LinkOnceODRLinkage
                    : llvm::Function::InternalLinkage;
@@ -1130,7 +1120,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
 
       case TSK_ExplicitInstantiationDeclaration:
         llvm_unreachable("Should not have been asked to emit this");
-      }
+    }
   }
 
   // -fapple-kext mode does not support weak linkage, so we must use
@@ -1224,21 +1214,6 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
       TSK == TSK_ExplicitInstantiationDefinition)
     return false;
 
-  // Itanium C++ ABI [5.2.3]:
-  // Virtual tables for dynamic classes are emitted as follows:
-  //
-  // - If the class is templated, the tables are emitted in every object that
-  // references any of them.
-  // - Otherwise, if the class is attached to a module, the tables are uniquely
-  // emitted in the object for the module unit in which it is defined.
-  // - Otherwise, if the class has a key function (see below), the tables are
-  // emitted in the object for the translation unit containing the definition of
-  // the key function. This is unique if the key function is not inline.
-  // - Otherwise, the tables are emitted in every object that references any of
-  // them.
-  if (RD->isInNamedModule())
-    return RD->shouldEmitInExternalSource();
-
   // Otherwise, if the class doesn't have a key function (possibly
   // anymore), the vtable must be defined here.
   const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD);
@@ -1248,7 +1223,13 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
   const FunctionDecl *Def;
   // Otherwise, if we don't have a definition of the key function, the
   // vtable must be defined somewhere else.
-  return !keyFunction->hasBody(Def);
+  if (!keyFunction->hasBody(Def))
+    return true;
+
+  assert(Def && "The body of the key function is not assigned to Def?");
+  // If the non-inline key function comes from another module unit, the vtable
+  // must be defined there.
+  return Def->isInAnotherModuleUnit() && !Def->isInlineSpecified();
 }
 
 /// Given that we're currently at the end of the translation unit, and
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 63e36e1b838936..e1d056765a8663 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2161,9 +2161,6 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
   if (!canSpeculativelyEmitVTableAsBaseClass(RD))
     return false;
 
-  if (RD->shouldEmitInExternalSource())
-    return false;
-
   // For a complete-object vtable (or more specifically, for the VTT), we need
   // to be able to speculatively emit the vtables of all dynamic virtual bases.
   for (const auto &B : RD->vbases()) {
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index b9cf258215334f..b494ad2be53bda 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -700,7 +700,10 @@ void Parser::ParseGNUAttributeArgs(
     ParseAttributeWithTypeArg(*AttrName, AttrNameLoc, Attrs, ScopeName,
                               ScopeLoc, Form);
     return;
-  } else if (AttrKind == ParsedAttr::AT_CountedBy) {
+  } else if (AttrKind == ParsedAttr::AT_CountedBy ||
+             AttrKind == ParsedAttr::AT_CountedByOrNull ||
+             AttrKind == ParsedAttr::AT_SizedBy ||
+             AttrKind == ParsedAttr::AT_SizedByOrNull) {
     ParseBoundsAttribute(*AttrName, AttrNameLoc, Attrs, ScopeName, ScopeLoc,
                          Form);
     return;
@@ -4866,9 +4869,8 @@ static void DiagnoseCountAttributedTypeInUnnamedAnon(ParsingDeclSpec &DS,
 
     for (const auto &DD : CAT->dependent_decls()) {
       if (!RD->containsDecl(DD.getDecl())) {
-        P.Diag(VD->getBeginLoc(),
-               diag::err_flexible_array_count_not_in_same_struct)
-            << DD.getDecl();
+        P.Diag(VD->getBeginLoc(), diag::err_count_attr_param_not_in_same_struct)
+            << DD.getDecl() << CAT->getKind() << CAT->isArrayType();
         P.Diag(DD.getDecl()->getBeginLoc(),
                diag::note_flexible_array_counted_by_attr_field)
             << DD.getDecl();
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index ecf8eb6a3af289..ad9555eb79ebd6 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -15201,6 +15201,9 @@ ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
       if (II->isStr("main") || II->isStr("efi_main"))
         return false;
 
+  if (FD->isMSVCRTEntryPoint())
+    return false;
+
   // Don't warn about inline functions.
   if (FD->isInlined())
     return false;
@@ -18007,15 +18010,6 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
       if (NumInitMethods > 1 || !Def->hasInitMethod())
         Diag(RD->getLocation(), diag::err_sycl_special_type_num_init_method);
     }
-
-    // If we're defining a dynamic class in a module interface unit, we always
-    // need to produce the vtable for it even if the vtable is not used in the
-    // current TU.
-    //
-    // The case that the current class is not dynamic is handled in
-    // MarkVTableUsed.
-    if (getCurrentModule() && getCurrentModule()->isInterfaceOrPartition())
-      MarkVTableUsed(RD->getLocation(), RD, /*DefinitionRequired=*/true);
   }
 
   // Exit this scope of this tag's definition.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 2f16d0f76dbd9d..73a85ff39667bc 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5868,6 +5868,15 @@ static const RecordDecl *GetEnclosingNamedOrTopAnonRecord(const FieldDecl *FD) {
   return RD;
 }
 
+static CountAttributedType::DynamicCountPointerKind
+getCountAttrKind(bool CountInBytes, bool OrNull) {
+  if (CountInBytes)
+    return OrNull ? CountAttributedType::SizedByOrNull
+                  : CountAttributedType::SizedBy;
+  return OrNull ? CountAttributedType::CountedByOrNull
+                : CountAttributedType::CountedBy;
+}
+
 enum class CountedByInvalidPointeeTypeKind {
   INCOMPLETE,
   SIZELESS,
@@ -5876,22 +5885,31 @@ enum class CountedByInvalidPointeeTypeKind {
   VALID,
 };
 
-static bool CheckCountedByAttrOnField(
-    Sema &S, FieldDecl *FD, Expr *E,
-    llvm::SmallVectorImpl<TypeCoupledDeclRefInfo> &Decls) {
+static bool
+CheckCountedByAttrOnField(Sema &S, FieldDecl *FD, Expr *E,
+                          llvm::SmallVectorImpl<TypeCoupledDeclRefInfo> &Decls,
+                          bool CountInBytes, bool OrNull) {
   // Check the context the attribute is used in
 
+  unsigned Kind = getCountAttrKind(CountInBytes, OrNull);
+
   if (FD->getParent()->isUnion()) {
-    S.Diag(FD->getBeginLoc(), diag::err_counted_by_attr_in_union)
-        << FD->getSourceRange();
+    S.Diag(FD->getBeginLoc(), diag::err_count_attr_in_union)
+        << Kind << FD->getSourceRange();
     return true;
   }
 
   const auto FieldTy = FD->getType();
+  if (FieldTy->isArrayType() && (CountInBytes || OrNull)) {
+    S.Diag(FD->getBeginLoc(),
+           diag::err_count_attr_not_on_ptr_or_flexible_array_member)
+        << Kind << FD->getLocation() << /* suggest counted_by */ 1;
+    return true;
+  }
   if (!FieldTy->isArrayType() && !FieldTy->isPointerType()) {
     S.Diag(FD->getBeginLoc(),
-           diag::err_counted_by_attr_not_on_ptr_or_flexible_array_member)
-        << FD->getLocation();
+           diag::err_count_attr_not_on_ptr_or_flexible_array_member)
+        << Kind << FD->getLocation() << /* do not suggest counted_by */ 0;
     return true;
   }
 
@@ -5902,7 +5920,7 @@ static bool CheckCountedByAttrOnField(
                                        StrictFlexArraysLevel, true)) {
     S.Diag(FD->getBeginLoc(),
            diag::err_counted_by_attr_on_array_not_flexible_array_member)
-        << FD->getLocation();
+        << Kind << FD->getLocation();
     return true;
   }
 
@@ -5923,7 +5941,7 @@ static bool CheckCountedByAttrOnField(
   // only `PointeeTy->isStructureTypeWithFlexibleArrayMember()` is reachable
   // when `FieldTy->isArrayType()`.
   bool ShouldWarn = false;
-  if (PointeeTy->isIncompleteType()) {
+  if (PointeeTy->isIncompleteType() && !CountInBytes) {
     InvalidTypeKind = CountedByInvalidPointeeTypeKind::INCOMPLETE;
   } else if (PointeeTy->isSizelessType()) {
     InvalidTypeKind = CountedByInvalidPointeeTypeKind::SIZELESS;
@@ -5948,23 +5966,23 @@ static bool CheckCountedByAttrOnField(
                           : diag::err_counted_by_attr_pointee_unknown_size;
     S.Diag(FD->getBeginLoc(), DiagID)
         << SelectPtrOrArr << PointeeTy << (int)InvalidTypeKind
-        << (ShouldWarn ? 1 : 0) << FD->getSourceRange();
+        << (ShouldWarn ? 1 : 0) << Kind << FD->getSourceRange();
     return true;
   }
 
   // Check the expression
 
   if (!E->getType()->isIntegerType() || E->getType()->isBooleanType()) {
-    S.Diag(E->getBeginLoc(), diag::err_counted_by_attr_argument_not_integer)
-        << E->getSourceRange();
+    S.Diag(E->getBeginLoc(), diag::err_count_attr_argument_not_integer)
+        << Kind << E->getSourceRange();
     return true;
   }
 
   auto *DRE = dyn_cast<DeclRefExpr>(E);
   if (!DRE) {
     S.Diag(E->getBeginLoc(),
-           diag::err_counted_by_attr_only_support_simple_decl_reference)
-        << E->getSourceRange();
+           diag::err_count_attr_only_support_simple_decl_reference)
+        << Kind << E->getSourceRange();
     return true;
   }
 
@@ -5974,8 +5992,8 @@ static bool CheckCountedByAttrOnField(
     CountFD = IFD->getAnonField();
   }
   if (!CountFD) {
-    S.Diag(E->getBeginLoc(), diag::err_counted_by_must_be_in_structure)
-        << CountDecl << E->getSourceRange();
+    S.Diag(E->getBeginLoc(), diag::err_count_attr_must_be_in_structure)
+        << CountDecl << Kind << E->getSourceRange();
 
     S.Diag(CountDecl->getBeginLoc(),
            diag::note_flexible_array_counted_by_attr_field)
@@ -5985,8 +6003,8 @@ static bool CheckCountedByAttrOnField(
 
   if (FD->getParent() != CountFD->getParent()) {
     if (CountFD->getParent()->isUnion()) {
-      S.Diag(CountFD->getBeginLoc(), diag::err_counted_by_attr_refer_to_union)
-          << CountFD->getSourceRange();
+      S.Diag(CountFD->getBeginLoc(), diag::err_count_attr_refer_to_union)
+          << Kind << CountFD->getSourceRange();
       return true;
     }
     // Whether CountRD is an anonymous struct is not determined at this
@@ -5996,9 +6014,8 @@ static bool CheckCountedByAttrOnField(
     auto *CountRD = GetEnclosingNamedOrTopAnonRecord(CountFD);
 
     if (RD != CountRD) {
-      S.Diag(E->getBeginLoc(),
-             diag::err_flexible_array_count_not_in_same_struct)
-          << CountFD << E->getSourceRange();
+      S.Diag(E->getBeginLoc(), diag::err_count_attr_param_not_in_same_struct)
+          << CountFD << Kind << FieldTy->isArrayType() << E->getSourceRange();
       S.Diag(CountFD->getBeginLoc(),
              diag::note_flexible_array_counted_by_attr_field)
           << CountFD << CountFD->getSourceRange();
@@ -6018,12 +6035,35 @@ static void handleCountedByAttrField(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (!CountExpr)
     return;
 
+  bool CountInBytes;
+  bool OrNull;
+  switch (AL.getKind()) {
+  case ParsedAttr::AT_CountedBy:
+    CountInBytes = false;
+    OrNull = false;
+    break;
+  case ParsedAttr::AT_CountedByOrNull:
+    CountInBytes = false;
+    OrNull = true;
+    break;
+  case ParsedAttr::AT_SizedBy:
+    CountInBytes = true;
+    OrNull = false;
+    break;
+  case ParsedAttr::AT_SizedByOrNull:
+    CountInBytes = true;
+    OrNull = true;
+    break;
+  default:
+    llvm_unreachable("unexpected counted_by family attribute");
+  }
+
   llvm::SmallVector<TypeCoupledDeclRefInfo, 1> Decls;
-  if (CheckCountedByAttrOnField(S, FD, CountExpr, Decls))
+  if (CheckCountedByAttrOnField(S, FD, CountExpr, Decls, CountInBytes, OrNull))
     return;
 
-  QualType CAT =
-      S.BuildCountAttributedArrayOrPointerType(FD->getType(), CountExpr);
+  QualType CAT = S.BuildCountAttributedArrayOrPointerType(
+      FD->getType(), CountExpr, CountInBytes, OrNull);
   FD->setType(CAT);
 }
 
@@ -6971,6 +7011,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
     break;
 
   case ParsedAttr::AT_CountedBy:
+  case ParsedAttr::AT_CountedByOrNull:
+  case ParsedAttr::AT_SizedBy:
+  case ParsedAttr::AT_SizedByOrNull:
     handleCountedByAttrField(S, D, AL);
     break;
 
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 5959c3bdab4446..727a1bb15b0886 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -18541,15 +18541,11 @@ bool Sema::DefineUsedVTables() {
 
     bool DefineVTable = true;
 
+    // If this class has a key function, but that key function is
+    // defined in another translation unit, we don't need to emit the
+    // vtable even though we're using it.
     const CXXMethodDecl *KeyFunction = Context.getCurrentKeyFunction(Class);
-    // V-tables for non-template classes with an owning module are always
-    // uniquely emitted in that module.
-    if (Class->isInCurrentModuleUnit())
-      DefineVTable = true;
-    else if (KeyFunction && !KeyFunction->hasBody()) {
-      // If this class has a key function, but that key function is
-      // defined in another translation unit, we don't need to emit the
-      // vtable even though we're using it.
+    if (KeyFunction && !KeyFunction->hasBody()) {
       // The key function is in another translation unit.
       DefineVTable = false;
       TemplateSpecializationKind TSK =
@@ -18594,7 +18590,7 @@ bool Sema::DefineUsedVTables() {
     DefinedAnything = true;
     MarkVirtualMembersReferenced(Loc, Class);
     CXXRecordDecl *Canonical = Class->getCanonicalDecl();
-    if (VTablesUsed[Canonical] && !Class->shouldEmitInExternalSource())
+    if (VTablesUsed[Canonical])
       Consumer.HandleVTable(Class);
 
     // Warn if we're emitting a weak vtable. The vtable will be weak if there is
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 34ceee110a800b..122fec671b19f2 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2204,23 +2204,110 @@ namespace {
 class ExtractTypeForDeductionGuide
   : public TreeTransform<ExtractTypeForDeductionGuide> {
   llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs;
+  ClassTemplateDecl *NestedPattern;
+  const MultiLevelTemplateArgumentList *OuterInstantiationArgs;
+  std::optional<TemplateDeclInstantiator> TypedefNameInstantiator;
 
 public:
   typedef TreeTransform<ExtractTypeForDeductionGuide> Base;
   ExtractTypeForDeductionGuide(
       Sema &SemaRef,
-      llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs)
-      : Base(SemaRef), MaterializedTypedefs(MaterializedTypedefs) {}
+      llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs,
+      ClassTemplateDecl *NestedPattern,
+      const MultiLevelTemplateArgumentList *OuterInstantiationArgs)
+      : Base(SemaRef), MaterializedTypedefs(MaterializedTypedefs),
+        NestedPattern(NestedPattern),
+        OuterInstantiationArgs(OuterInstantiationArgs) {
+    if (OuterInstantiationArgs)
+      TypedefNameInstantiator.emplace(
+          SemaRef, SemaRef.getASTContext().getTranslationUnitDecl(),
+          *OuterInstantiationArgs);
+  }
 
   TypeSourceInfo *transform(TypeSourceInfo *TSI) { return TransformType(TSI); }
 
+  /// Returns true if it's safe to substitute \p Typedef with
+  /// \p OuterInstantiationArgs.
+  bool mightReferToOuterTemplateParameters(TypedefNameDecl *Typedef) {
+    if (!NestedPattern)
+      return false;
+
+    static auto WalkUp = [](DeclContext *DC, DeclContext *TargetDC) {
+      if (DC->Equals(TargetDC))
+        return true;
+      while (DC->isRecord()) {
+        if (DC->Equals(TargetDC))
+          return true;
+        DC = DC->getParent();
+      }
+      return false;
+    };
+
+    if (WalkUp(Typedef->getDeclContext(), NestedPattern->getTemplatedDecl()))
+      return true;
+    if (WalkUp(NestedPattern->getTemplatedDecl(), Typedef->getDeclContext()))
+      return true;
+    return false;
+  }
+
+  QualType
+  RebuildTemplateSpecializationType(TemplateName Template,
+                                    SourceLocation TemplateNameLoc,
+                                    TemplateArgumentListInfo &TemplateArgs) {
+    if (!OuterInstantiationArgs ||
+        !isa_and_present<TypeAliasTemplateDecl>(Template.getAsTemplateDecl()))
+      return Base::RebuildTemplateSpecializationType(Template, TemplateNameLoc,
+                                                     TemplateArgs);
+
+    auto *TATD = cast<TypeAliasTemplateDecl>(Template.getAsTemplateDecl());
+    auto *Pattern = TATD;
+    while (Pattern->getInstantiatedFromMemberTemplate())
+      Pattern = Pattern->getInstantiatedFromMemberTemplate();
+    if (!mightReferToOuterTemplateParameters(Pattern->getTemplatedDecl()))
+      return Base::RebuildTemplateSpecializationType(Template, TemplateNameLoc,
+                                                     TemplateArgs);
+
+    Decl *NewD =
+        TypedefNameInstantiator->InstantiateTypeAliasTemplateDecl(TATD);
+    if (!NewD)
+      return QualType();
+
+    auto *NewTATD = cast<TypeAliasTemplateDecl>(NewD);
+    MaterializedTypedefs.push_back(NewTATD->getTemplatedDecl());
+
+    return Base::RebuildTemplateSpecializationType(
+        TemplateName(NewTATD), TemplateNameLoc, TemplateArgs);
+  }
+
   QualType TransformTypedefType(TypeLocBuilder &TLB, TypedefTypeLoc TL) {
     ASTContext &Context = SemaRef.getASTContext();
     TypedefNameDecl *OrigDecl = TL.getTypedefNameDecl();
     TypedefNameDecl *Decl = OrigDecl;
     // Transform the underlying type of the typedef and clone the Decl only if
     // the typedef has a dependent context.
-    if (OrigDecl->getDeclContext()->isDependentContext()) {
+    bool InDependentContext = OrigDecl->getDeclContext()->isDependentContext();
+
+    // A typedef/alias Decl within the NestedPattern may reference the outer
+    // template parameters. They're substituted with corresponding instantiation
+    // arguments here and in RebuildTemplateSpecializationType() above.
+    // Otherwise, we would have a CTAD guide with "dangling" template
+    // parameters.
+    // For example,
+    //   template <class T> struct Outer {
+    //     using Alias = S<T>;
+    //     template <class U> struct Inner {
+    //       Inner(Alias);
+    //     };
+    //   };
+    if (OuterInstantiationArgs && InDependentContext &&
+        TL.getTypePtr()->isInstantiationDependentType()) {
+      Decl = cast_if_present<TypedefNameDecl>(
+          TypedefNameInstantiator->InstantiateTypedefNameDecl(
+              OrigDecl, /*IsTypeAlias=*/isa<TypeAliasDecl>(OrigDecl)));
+      if (!Decl)
+        return QualType();
+      MaterializedTypedefs.push_back(Decl);
+    } else if (InDependentContext) {
       TypeLocBuilder InnerTLB;
       QualType Transformed =
           TransformType(InnerTLB, OrigDecl->getTypeSourceInfo()->getTypeLoc());
@@ -2567,8 +2654,9 @@ struct ConvertConstructorToDeductionGuideTransform {
       // defined outside of the surrounding class template. That is T in the
       // above example.
       if (NestedPattern) {
-        NewParam = transformFunctionTypeParam(NewParam, OuterInstantiationArgs,
-                                              MaterializedTypedefs);
+        NewParam = transformFunctionTypeParam(
+            NewParam, OuterInstantiationArgs, MaterializedTypedefs,
+            /*TransformingOuterPatterns=*/true);
         if (!NewParam)
           return QualType();
       }
@@ -2576,7 +2664,8 @@ struct ConvertConstructorToDeductionGuideTransform {
       // defined at the class template and the constructor. In this example,
       // they're U and V, respectively.
       NewParam =
-          transformFunctionTypeParam(NewParam, Args, MaterializedTypedefs);
+          transformFunctionTypeParam(NewParam, Args, MaterializedTypedefs,
+                                     /*TransformingOuterPatterns=*/false);
       if (!NewParam)
         return QualType();
       ParamTypes.push_back(NewParam->getType());
@@ -2620,7 +2709,8 @@ struct ConvertConstructorToDeductionGuideTransform {
 
   ParmVarDecl *transformFunctionTypeParam(
       ParmVarDecl *OldParam, MultiLevelTemplateArgumentList &Args,
-      llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs) {
+      llvm::SmallVectorImpl<TypedefNameDecl *> &MaterializedTypedefs,
+      bool TransformingOuterPatterns) {
     TypeSourceInfo *OldDI = OldParam->getTypeSourceInfo();
     TypeSourceInfo *NewDI;
     if (auto PackTL = OldDI->getTypeLoc().getAs<PackExpansionTypeLoc>()) {
@@ -2643,7 +2733,9 @@ struct ConvertConstructorToDeductionGuideTransform {
     // members of the current instantiations with the definitions of those
     // typedefs, avoiding triggering instantiation of the deduced type during
     // deduction.
-    NewDI = ExtractTypeForDeductionGuide(SemaRef, MaterializedTypedefs)
+    NewDI = ExtractTypeForDeductionGuide(
+                SemaRef, MaterializedTypedefs, NestedPattern,
+                TransformingOuterPatterns ? &Args : nullptr)
                 .transform(NewDI);
 
     // Resolving a wording defect, we also inherit default arguments from the
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 2e90f0c215b8d9..88f6af80cbc550 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1096,8 +1096,8 @@ Decl *TemplateDeclInstantiator::VisitTypeAliasDecl(TypeAliasDecl *D) {
   return Typedef;
 }
 
-Decl *
-TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
+Decl *TemplateDeclInstantiator::InstantiateTypeAliasTemplateDecl(
+    TypeAliasTemplateDecl *D) {
   // Create a local instantiation scope for this type alias template, which
   // will contain the instantiations of the template parameters.
   LocalInstantiationScope Scope(SemaRef);
@@ -1143,7 +1143,14 @@ TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
   if (!PrevAliasTemplate)
     Inst->setInstantiatedFromMemberTemplate(D);
 
-  Owner->addDecl(Inst);
+  return Inst;
+}
+
+Decl *
+TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
+  Decl *Inst = InstantiateTypeAliasTemplateDecl(D);
+  if (Inst)
+    Owner->addDecl(Inst);
 
   return Inst;
 }
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 1a19a073d5223a..0203a8e74ad49c 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -9321,15 +9321,17 @@ BuildTypeCoupledDecls(Expr *E,
 }
 
 QualType Sema::BuildCountAttributedArrayOrPointerType(QualType WrappedTy,
-                                                      Expr *CountExpr) {
+                                                      Expr *CountExpr,
+                                                      bool CountInBytes,
+                                                      bool OrNull) {
   assert(WrappedTy->isIncompleteArrayType() || WrappedTy->isPointerType());
 
   llvm::SmallVector<TypeCoupledDeclRefInfo, 1> Decls;
   BuildTypeCoupledDecls(CountExpr, Decls);
   /// When the resulting expression is invalid, we still create the AST using
   /// the original count expression for the sake of AST dump.
-  return Context.getCountAttributedType(
-      WrappedTy, CountExpr, /*CountInBytes*/ false, /*OrNull*/ false, Decls);
+  return Context.getCountAttributedType(WrappedTy, CountExpr, CountInBytes,
+                                        OrNull, Decls);
 }
 
 /// getDecltypeForExpr - Given an expr, will return the decltype for
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index df271cb43e2013..4fbd0e0c90371d 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -7397,7 +7397,8 @@ QualType TreeTransform<Derived>::TransformCountAttributedType(
   if (getDerived().AlwaysRebuild() || InnerTy != OldTy->desugar() ||
       OldCount != NewCount) {
     // Currently, CountAttributedType can only wrap incomplete array types.
-    Result = SemaRef.BuildCountAttributedArrayOrPointerType(InnerTy, NewCount);
+    Result = SemaRef.BuildCountAttributedArrayOrPointerType(
+        InnerTy, NewCount, OldTy->isCountInBytes(), OldTy->isOrNull());
   }
 
   TLB.push<CountAttributedTypeLoc>(Result);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 079ac3f0e3545f..afdeccaf93a9df 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3921,13 +3921,6 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       }
       break;
 
-    case VTABLES_TO_EMIT:
-      if (F.Kind == MK_MainFile ||
-          getContext().getLangOpts().BuildingPCHWithObjectFile)
-        for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/)
-          VTablesToEmit.push_back(ReadDeclID(F, Record, I));
-      break;
-
     case IMPORTED_MODULES:
       if (!F.isModule()) {
         // If we aren't loading a module (which has its own exports), make
@@ -8117,10 +8110,6 @@ void ASTReader::PassInterestingDeclToConsumer(Decl *D) {
     Consumer->HandleInterestingDecl(DeclGroupRef(D));
 }
 
-void ASTReader::PassVTableToConsumer(CXXRecordDecl *RD) {
-  Consumer->HandleVTable(RD);
-}
-
 void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) {
   this->Consumer = Consumer;
 
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index cf3737c5a501d8..cbaf1b0a98c614 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -4235,13 +4235,6 @@ void ASTReader::PassInterestingDeclsToConsumer() {
 
   // If we add any new potential interesting decl in the last call, consume it.
   ConsumingPotentialInterestingDecls();
-
-  for (GlobalDeclID ID : VTablesToEmit) {
-    auto *RD = cast<CXXRecordDecl>(GetDecl(ID));
-    assert(!RD->shouldEmitInExternalSource());
-    PassVTableToConsumer(RD);
-  }
-  VTablesToEmit.clear();
 }
 
 void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1b0208a20766ee..5b5b468532f324 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -927,7 +927,6 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
   RECORD(PP_ASSUME_NONNULL_LOC);
   RECORD(PP_UNSAFE_BUFFER_USAGE);
-  RECORD(VTABLES_TO_EMIT);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -3962,10 +3961,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
     Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
 }
 
-void ASTWriter::handleVTable(CXXRecordDecl *RD) {
-  PendingEmittingVTables.push_back(RD);
-}
-
 //===----------------------------------------------------------------------===//
 // DeclContext's Name Lookup Table Serialization
 //===----------------------------------------------------------------------===//
@@ -5168,13 +5163,6 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) {
   // Write all of the DeclsToCheckForDeferredDiags.
   for (auto *D : SemaRef.DeclsToCheckForDeferredDiags)
     GetDeclRef(D);
-
-  // Write all classes need to emit the vtable definitions if required.
-  if (isWritingStdCXXNamedModules())
-    for (CXXRecordDecl *RD : PendingEmittingVTables)
-      GetDeclRef(RD);
-  else
-    PendingEmittingVTables.clear();
 }
 
 void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
@@ -5329,17 +5317,6 @@ void ASTWriter::WriteSpecialDeclRecords(Sema &SemaRef) {
   }
   if (!DeleteExprsToAnalyze.empty())
     Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze);
-
-  RecordData VTablesToEmit;
-  for (CXXRecordDecl *RD : PendingEmittingVTables) {
-    if (!wasDeclEmitted(RD))
-      continue;
-
-    AddDeclRef(RD, VTablesToEmit);
-  }
-
-  if (!VTablesToEmit.empty())
-    Stream.EmitRecord(VTABLES_TO_EMIT, VTablesToEmit);
 }
 
 ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 59d94c3d79824b..b6583c54c9ba1f 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1537,14 +1537,8 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
   if (D->isThisDeclarationADefinition())
     Record.AddCXXDefinitionData(D);
 
-  if (D->isCompleteDefinition() && D->isInNamedModule())
-    Writer.AddDeclRef(D, Writer.ModularCodegenDecls);
-
   // Store (what we currently believe to be) the key function to avoid
   // deserializing every method so we can compute it.
-  //
-  // FIXME: Avoid adding the key function if the class is defined in
-  // module purview since the key function is meaningless in module purview.
   if (D->isCompleteDefinition())
     Record.AddDeclRef(Context.getCurrentKeyFunction(D));
 
diff --git a/clang/test/AST/attr-counted-by-or-null-late-parsed-struct-ptrs.c b/clang/test/AST/attr-counted-by-or-null-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..975c0a0231943c
--- /dev/null
+++ b/clang/test/AST/attr-counted-by-or-null-late-parsed-struct-ptrs.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes %s -ast-dump | FileCheck %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known *buf __counted_by_or_null(count);
+  int count;
+};
+// CHECK-LABEL: struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.*}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT: `-FieldDecl {{.*}} referenced count 'int'
+
+struct on_pointer_anon_count {
+  struct size_known *buf __counted_by_or_null(count);
+  struct {
+    int count;
+  };
+};
+
+// CHECK-LABEL: struct on_pointer_anon_count definition
+// CHECK-NEXT:  |-FieldDecl {{.*}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-RecordDecl {{.*}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.*}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.*}} implicit 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.*}} implicit referenced count 'int'
+// CHECK-NEXT:    |-Field {{.*}} '' 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:    `-Field {{.*}} 'count' 'int'
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse counted_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `count`
+// field being unavailable.
+//
+// See `clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c` for test
+// cases.
diff --git a/clang/test/AST/attr-counted-by-or-null-struct-ptrs.c b/clang/test/AST/attr-counted-by-or-null-struct-ptrs.c
new file mode 100644
index 00000000000000..cedb3f1192eda3
--- /dev/null
+++ b/clang/test/AST/attr-counted-by-or-null-struct-ptrs.c
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 %s -ast-dump | FileCheck %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT: `-FieldDecl {{.+}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+struct on_member_pointer_complete_ty {
+  int count;
+  struct size_known * buf __counted_by_or_null(count);
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+struct on_pointer_anon_buf {
+  int count;
+  struct {
+    struct size_known *buf __counted_by_or_null(count);
+  };
+};
+
+struct on_pointer_anon_count {
+  struct {
+    int count;
+  };
+  struct size_known *buf __counted_by_or_null(count);
+};
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse counted_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+struct on_member_pointer_complete_ty_ty_pos {
+  int count;
+  struct size_known *__counted_by_or_null(count) buf;
+};
+
+// TODO: This should be forbidden but isn't due to counted_by_or_null being treated as a
+// declaration attribute. The attribute ends up on the outer most pointer
+// (allowed by sema) even though syntactically its supposed to be on the inner
+// pointer (would not allowed by sema due to pointee being a function type).
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_fn_ptr_ty_ty_pos_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} fn_ptr 'void (** __counted_by_or_null(count))(void)':'void (**)(void)'
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int count;
+  void (* __counted_by_or_null(count) * fn_ptr)(void);
+};
+
+// FIXME: The generated AST here is wrong. The attribute should be on the inner
+// pointer.
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __counted_by_or_null(count)':'struct size_known **'
+struct on_nested_pointer_inner {
+  int count;
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__counted_by_or_null` can only be nested when used in function parameters.
+  struct size_known *__counted_by_or_null(count) *buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_outer definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __counted_by_or_null(count)':'struct size_known **'
+struct on_nested_pointer_outer {
+  int count;
+  struct size_known **__counted_by_or_null(count) buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __counted_by_or_null(count)':'struct size_known *'
+struct on_pointer_anon_buf_ty_pos {
+  int count;
+  struct {
+    struct size_known * __counted_by_or_null(count) buf;
+  };
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_count_ty_pos definition
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3:.+]])'
+// CHECK-NEXT:  |-IndirectFieldDecl {{.+}} implicit referenced count 'int'
+// CHECK-NEXT:  | |-Field {{.+}} '' 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3]])'
+// CHECK-NEXT:  | `-Field {{.+}} 'count' 'int'
+struct on_pointer_anon_count_ty_pos {
+  struct {
+    int count;
+  };
+  struct size_known *__counted_by_or_null(count) buf;
+};
diff --git a/clang/test/AST/attr-sized-by-late-parsed-struct-ptrs.c b/clang/test/AST/attr-sized-by-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..b58caf608bf975
--- /dev/null
+++ b/clang/test/AST/attr-sized-by-late-parsed-struct-ptrs.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes %s -ast-dump | FileCheck %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __sized_by on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known *buf __sized_by(count);
+  int count;
+};
+// CHECK-LABEL: struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.*}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT: `-FieldDecl {{.*}} referenced count 'int'
+
+struct on_pointer_anon_count {
+  struct size_known *buf __sized_by(count);
+  struct {
+    int count;
+  };
+};
+
+// CHECK-LABEL: struct on_pointer_anon_count definition
+// CHECK-NEXT:  |-FieldDecl {{.*}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT:  |-RecordDecl {{.*}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.*}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.*}} implicit 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.*}} implicit referenced count 'int'
+// CHECK-NEXT:    |-Field {{.*}} '' 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:    `-Field {{.*}} 'count' 'int'
+
+//==============================================================================
+// __sized_by on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `count`
+// field being unavailable.
+//
+// See `clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c` for test
+// cases.
diff --git a/clang/test/AST/attr-sized-by-or-null-late-parsed-struct-ptrs.c b/clang/test/AST/attr-sized-by-or-null-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..d55a42ac0fb947
--- /dev/null
+++ b/clang/test/AST/attr-sized-by-or-null-late-parsed-struct-ptrs.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes %s -ast-dump | FileCheck %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known *buf __sized_by_or_null(count);
+  int count;
+};
+// CHECK-LABEL: struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.*}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT: `-FieldDecl {{.*}} referenced count 'int'
+
+struct on_pointer_anon_count {
+  struct size_known *buf __sized_by_or_null(count);
+  struct {
+    int count;
+  };
+};
+
+// CHECK-LABEL: struct on_pointer_anon_count definition
+// CHECK-NEXT:  |-FieldDecl {{.*}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-RecordDecl {{.*}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.*}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.*}} implicit 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.*}} implicit referenced count 'int'
+// CHECK-NEXT:    |-Field {{.*}} '' 'struct on_pointer_anon_count::(anonymous at {{.*}})'
+// CHECK-NEXT:    `-Field {{.*}} 'count' 'int'
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `count`
+// field being unavailable.
+//
+// See `clang/test/Sema/attr-counted-by-late-parsed-struct-ptrs.c` for test
+// cases.
diff --git a/clang/test/AST/attr-sized-by-or-null-struct-ptrs.c b/clang/test/AST/attr-sized-by-or-null-struct-ptrs.c
new file mode 100644
index 00000000000000..6189799b85ccb2
--- /dev/null
+++ b/clang/test/AST/attr-sized-by-or-null-struct-ptrs.c
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 %s -ast-dump | FileCheck %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT: `-FieldDecl {{.+}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+struct on_member_pointer_complete_ty {
+  int count;
+  struct size_known * buf __sized_by_or_null(count);
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+struct on_pointer_anon_buf {
+  int count;
+  struct {
+    struct size_known *buf __sized_by_or_null(count);
+  };
+};
+
+struct on_pointer_anon_count {
+  struct {
+    int count;
+  };
+  struct size_known *buf __sized_by_or_null(count);
+};
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+struct on_member_pointer_complete_ty_ty_pos {
+  int count;
+  struct size_known *__sized_by_or_null(count) buf;
+};
+
+// TODO: This should be forbidden but isn't due to sized_by_or_null being treated as a
+// declaration attribute. The attribute ends up on the outer most pointer
+// (allowed by sema) even though syntactically its supposed to be on the inner
+// pointer (would not allowed by sema due to pointee being a function type).
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_fn_ptr_ty_ty_pos_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} fn_ptr 'void (** __sized_by_or_null(count))(void)':'void (**)(void)'
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int count;
+  void (* __sized_by_or_null(count) * fn_ptr)(void);
+};
+
+// FIXME: The generated AST here is wrong. The attribute should be on the inner
+// pointer.
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __sized_by_or_null(count)':'struct size_known **'
+struct on_nested_pointer_inner {
+  int count;
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by_or_null` can only be nested when used in function parameters.
+  struct size_known *__sized_by_or_null(count) *buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_outer definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __sized_by_or_null(count)':'struct size_known **'
+struct on_nested_pointer_outer {
+  int count;
+  struct size_known **__sized_by_or_null(count) buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __sized_by_or_null(count)':'struct size_known *'
+struct on_pointer_anon_buf_ty_pos {
+  int count;
+  struct {
+    struct size_known * __sized_by_or_null(count) buf;
+  };
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_count_ty_pos definition
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3:.+]])'
+// CHECK-NEXT:  |-IndirectFieldDecl {{.+}} implicit referenced count 'int'
+// CHECK-NEXT:  | |-Field {{.+}} '' 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3]])'
+// CHECK-NEXT:  | `-Field {{.+}} 'count' 'int'
+struct on_pointer_anon_count_ty_pos {
+  struct {
+    int count;
+  };
+  struct size_known *__sized_by_or_null(count) buf;
+};
diff --git a/clang/test/AST/attr-sized-by-struct-ptrs.c b/clang/test/AST/attr-sized-by-struct-ptrs.c
new file mode 100644
index 00000000000000..5d9ed0094c685b
--- /dev/null
+++ b/clang/test/AST/attr-sized-by-struct-ptrs.c
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 %s -ast-dump | FileCheck %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+//==============================================================================
+// __sized_by on struct member pointer in decl attribute position
+//==============================================================================
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty definition
+// CHECK-NEXT: |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT: `-FieldDecl {{.+}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+struct on_member_pointer_complete_ty {
+  int count;
+  struct size_known * buf __sized_by(count);
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf::(anonymous at [[ANON_STRUCT_PATH]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __sized_by(count)':'struct size_known *'
+struct on_pointer_anon_buf {
+  int count;
+  struct {
+    struct size_known *buf __sized_by(count);
+  };
+};
+
+struct on_pointer_anon_count {
+  struct {
+    int count;
+  };
+  struct size_known *buf __sized_by(count);
+};
+
+//==============================================================================
+// __sized_by on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_complete_ty_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+struct on_member_pointer_complete_ty_ty_pos {
+  int count;
+  struct size_known *__sized_by(count) buf;
+};
+
+// TODO: This should be forbidden but isn't due to sized_by being treated as a
+// declaration attribute. The attribute ends up on the outer most pointer
+// (allowed by sema) even though syntactically its supposed to be on the inner
+// pointer (would not allowed by sema due to pointee being a function type).
+// CHECK-LABEL: RecordDecl {{.+}} struct on_member_pointer_fn_ptr_ty_ty_pos_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} fn_ptr 'void (** __sized_by(count))(void)':'void (**)(void)'
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int count;
+  void (* __sized_by(count) * fn_ptr)(void);
+};
+
+// FIXME: The generated AST here is wrong. The attribute should be on the inner
+// pointer.
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_inner definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __sized_by(count)':'struct size_known **'
+struct on_nested_pointer_inner {
+  int count;
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by` can only be nested when used in function parameters.
+  struct size_known *__sized_by(count) *buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_nested_pointer_outer definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  `-FieldDecl {{.+}} buf 'struct size_known ** __sized_by(count)':'struct size_known **'
+struct on_nested_pointer_outer {
+  int count;
+  struct size_known **__sized_by(count) buf;
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_buf_ty_pos definition
+// CHECK-NEXT:  |-FieldDecl {{.+}} referenced count 'int'
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2:.+]])'
+// CHECK-NEXT:  `-IndirectFieldDecl {{.+}} implicit buf 'struct size_known * __sized_by(count)':'struct size_known *'
+// CHECK-NEXT:    |-Field {{.+}} '' 'struct on_pointer_anon_buf_ty_pos::(anonymous at [[ANON_STRUCT_PATH2]])'
+// CHECK-NEXT:    `-Field {{.+}} 'buf' 'struct size_known * __sized_by(count)':'struct size_known *'
+struct on_pointer_anon_buf_ty_pos {
+  int count;
+  struct {
+    struct size_known * __sized_by(count) buf;
+  };
+};
+
+// CHECK-LABEL: RecordDecl {{.+}} struct on_pointer_anon_count_ty_pos definition
+// CHECK-NEXT:  |-RecordDecl {{.+}} struct definition
+// CHECK-NEXT:  | `-FieldDecl {{.+}} count 'int'
+// CHECK-NEXT:  |-FieldDecl {{.+}} implicit 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3:.+]])'
+// CHECK-NEXT:  |-IndirectFieldDecl {{.+}} implicit referenced count 'int'
+// CHECK-NEXT:  | |-Field {{.+}} '' 'struct on_pointer_anon_count_ty_pos::(anonymous at [[ANON_STRUCT_PATH3]])'
+// CHECK-NEXT:  | `-Field {{.+}} 'count' 'int'
+struct on_pointer_anon_count_ty_pos {
+  struct {
+    int count;
+  };
+  struct size_known *__sized_by(count) buf;
+};
diff --git a/clang/test/CodeGenCXX/modules-vtable.cppm b/clang/test/CodeGenCXX/modules-vtable.cppm
index ee029ec496ceb5..fb179b1de4880b 100644
--- a/clang/test/CodeGenCXX/modules-vtable.cppm
+++ b/clang/test/CodeGenCXX/modules-vtable.cppm
@@ -24,9 +24,6 @@
 // RUN:     %t/M-A.cppm -o %t/M-A.pcm
 // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 -fmodule-file=M:A=%t/M-A.pcm \
 // RUN:     %t/M-B.cppm  -emit-llvm -o - | FileCheck %t/M-B.cppm
-// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 \
-// RUN:     %t/M-A.pcm  -emit-llvm -o - | FileCheck %t/M-A.cppm
-// XFAIL: *
 
 //--- Mod.cppm
 export module Mod;
@@ -44,10 +41,9 @@ Base::~Base() {}
 // CHECK: @_ZTSW3Mod4Base = constant
 // CHECK: @_ZTIW3Mod4Base = constant
 
-// With the new Itanium C++ ABI, the linkage of vtables in modules don't need to be linkonce ODR.
-// CHECK-INLINE: @_ZTVW3Mod4Base = {{.*}}unnamed_addr constant
-// CHECK-INLINE: @_ZTSW3Mod4Base = {{.*}}constant
-// CHECK-INLINE: @_ZTIW3Mod4Base = {{.*}}constant
+// CHECK-INLINE: @_ZTVW3Mod4Base = linkonce_odr {{.*}}unnamed_addr constant
+// CHECK-INLINE: @_ZTSW3Mod4Base = linkonce_odr {{.*}}constant
+// CHECK-INLINE: @_ZTIW3Mod4Base = linkonce_odr {{.*}}constant
 
 module :private;
 int private_use() {
@@ -62,13 +58,13 @@ int use() {
     return 43;
 }
 
-// CHECK-NOT: @_ZTSW3Mod4Base
-// CHECK-NOT: @_ZTIW3Mod4Base
-// CHECK: @_ZTVW3Mod4Base = external
+// CHECK-NOT: @_ZTSW3Mod4Base = constant
+// CHECK-NOT: @_ZTIW3Mod4Base = constant
+// CHECK: @_ZTVW3Mod4Base = external unnamed_addr
 
-// CHECK-INLINE-NOT: @_ZTSW3Mod4Base
-// CHECK-INLINE-NOT: @_ZTIW3Mod4Base
-// CHECK-INLINE: @_ZTVW3Mod4Base = external
+// CHECK-INLINE: @_ZTVW3Mod4Base = linkonce_odr {{.*}}unnamed_addr constant
+// CHECK-INLINE: @_ZTSW3Mod4Base = linkonce_odr {{.*}}constant
+// CHECK-INLINE: @_ZTIW3Mod4Base = linkonce_odr {{.*}}constant
 
 // Check the case that the declaration of the key function comes from another
 // module unit but the definition of the key function comes from the current
@@ -86,10 +82,6 @@ int a_use() {
     return 43;
 }
 
-// CHECK: @_ZTVW1M1C = unnamed_addr constant
-// CHECK: @_ZTSW1M1C = constant
-// CHECK: @_ZTIW1M1C = constant
-
 //--- M-B.cppm
 export module M:B;
 import :A;
@@ -101,6 +93,6 @@ int b_use() {
     return 43;
 }
 
-// CHECK: @_ZTVW1M1C = external
-// CHECK-NOT: @_ZTSW1M1C
-// CHECK-NOT: @_ZTIW1M1C
+// CHECK: @_ZTVW1M1C = unnamed_addr constant
+// CHECK: @_ZTSW1M1C = constant
+// CHECK: @_ZTIW1M1C = constant
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
index 7dbf5c3c6cd596..4e2f7f86e84022 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl
@@ -1,6 +1,8 @@
 // RUN: not %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
 // RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx900 -target-feature +wavefrontsize32 -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=GFX9
 
 // CHECK: error: invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive
+// GFX9: error: option 'wavefrontsize32' cannot be specified on this target
 
 kernel void test() {}
diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c
index 49bdb21ac59d6a..d3df5b9208e744 100644
--- a/clang/test/Driver/print-supported-extensions-riscv.c
+++ b/clang/test/Driver/print-supported-extensions-riscv.c
@@ -51,8 +51,8 @@
 // CHECK-NEXT:     zce                  1.0       'Zce' (Compressed extensions for microcontrollers)
 // CHECK-NEXT:     zcf                  1.0       'Zcf' (Compressed Single-Precision Floating-Point Instructions)
 // CHECK-NEXT:     zcmop                1.0       'Zcmop' (Compressed May-Be-Operations)
-// CHECK-NEXT:     zcmp                 1.0       'Zcmp' (sequenced instuctions for code-size reduction)
-// CHECK-NEXT:     zcmt                 1.0       'Zcmt' (table jump instuctions for code-size reduction)
+// CHECK-NEXT:     zcmp                 1.0       'Zcmp' (sequenced instructions for code-size reduction)
+// CHECK-NEXT:     zcmt                 1.0       'Zcmt' (table jump instructions for code-size reduction)
 // CHECK-NEXT:     zba                  1.0       'Zba' (Address Generation Instructions)
 // CHECK-NEXT:     zbb                  1.0       'Zbb' (Basic Bit-Manipulation)
 // CHECK-NEXT:     zbc                  1.0       'Zbc' (Carry-Less Multiplication)
diff --git a/clang/test/Modules/pr97313.cppm b/clang/test/Modules/pr97313.cppm
deleted file mode 100644
index a4f4cf5eebcf59..00000000000000
--- a/clang/test/Modules/pr97313.cppm
+++ /dev/null
@@ -1,118 +0,0 @@
-// REQUIRES: !system-windows
-//
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-//
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Base.cppm \
-// RUN:     -emit-module-interface -o %t/Base.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Sub.cppm \
-// RUN:     -emit-module-interface -o %t/Sub.pcm -fprebuilt-module-path=%t
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Sub.pcm \
-// RUN:     -emit-llvm -o %t/Sub.pcm -o - -fprebuilt-module-path=%t | \
-// RUN:     FileCheck %t/Sub.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/main.cpp \
-// RUN:     -emit-llvm -fprebuilt-module-path=%t -o - | FileCheck %t/main.cpp
-//
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Mod.cppm \
-// RUN:     -emit-module-interface -o %t/Mod.pcm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Mod.pcm \
-// RUN:     -emit-llvm -o - | FileCheck %t/Mod.cppm
-// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Use.cpp \
-// RUN:     -emit-llvm -fprebuilt-module-path=%t -o - | \
-// RUN:     FileCheck %t/Use.cpp
-
-//--- Base.cppm
-export module Base;
-
-export template <class>
-class Base
-{
-public:
-    constexpr Base();
-    constexpr virtual ~Base();
-};
-
-template <class X>
-constexpr Base<X>::Base() = default;
-
-template <class X>
-constexpr Base<X>::~Base() = default;
-
-//--- Sub.cppm
-export module Sub;
-export import Base;
-
-export class Sub : public Base<int>
-{
-};
-
-// CHECK: @_ZTIW4Base4BaseIiE = {{.*}}linkonce_odr
-
-//--- main.cpp
-import Sub;
-
-int main()
-{
-    Base<int> *b = new Sub();
-    delete b;
-}
-
-// CHECK: @_ZTIW4Base4BaseIiE = {{.*}}linkonce_odr
-
-//--- Mod.cppm
-export module Mod;
-
-export class NonTemplate {
-public:
-    virtual ~NonTemplate();
-};
-
-// CHECK: @_ZTIW3Mod11NonTemplate = {{.*}}constant
-
-export template <class C>
-class Template {
-public:
-    virtual ~Template();
-};
-
-export template<>
-class Template<char> {
-public:
-    virtual ~Template();
-};
-
-// CHECK: @_ZTIW3Mod8TemplateIcE = {{.*}}constant
-
-export template class Template<unsigned>;
-
-// CHECK: @_ZTIW3Mod8TemplateIjE = {{.*}}weak_odr
-
-export extern template class Template<double>;
-
-auto v = new Template<signed int>();
-
-// CHECK: @_ZTIW3Mod8TemplateIiE = {{.*}}linkonce_odr
-
-//--- Use.cpp
-import Mod;
-
-auto v1 = new NonTemplate();
-auto v2 = new Template<char>();
-auto v3 = new Template<unsigned>();
-auto v4 = new Template<double>();
-auto v5 = new Template<signed int>();
-auto v6 = new Template<NonTemplate>();
-
-// CHECK: @_ZTVW3Mod11NonTemplate = {{.*}}unnamed_addr constant
-// CHECK: @_ZTVW3Mod8TemplateIcE = {{.*}}unnamed_addr constant
-// CHECK: @_ZTVW3Mod8TemplateIjE = {{.*}}weak_odr
-// CHECK: @_ZTSW3Mod8TemplateIjE = {{.*}}weak_odr
-// CHECK: @_ZTIW3Mod8TemplateIjE = {{.*}}weak_odr
-// CHECK: @_ZTVW3Mod8TemplateIdE = {{.*}}unnamed_addr constant
-// CHECK: @_ZTVW3Mod8TemplateIiE = {{.*}}linkonce_odr
-// CHECK: @_ZTSW3Mod8TemplateIiE = {{.*}}linkonce_odr
-// CHECK: @_ZTIW3Mod8TemplateIiE = {{.*}}linkonce_odr
-// CHECK: @_ZTVW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
-// CHECK: @_ZTSW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
-// CHECK: @_ZTIW3Mod8TemplateIS_11NonTemplateE = {{.*}}linkonce_odr
diff --git a/clang/test/Sema/attr-counted-by-or-null-last-field.c b/clang/test/Sema/attr-counted-by-or-null-last-field.c
new file mode 100644
index 00000000000000..dd3a6422521c02
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-last-field.c
@@ -0,0 +1,141 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+// This has been adapted from clang/test/Sema/attr-counted-by-vla.c, but with VLAs replaced with pointers
+
+struct bar;
+
+struct not_found {
+  int count;
+  struct bar *ptr __counted_by_or_null(bork); // expected-error {{use of undeclared identifier 'bork'}}
+};
+
+struct no_found_count_not_in_substruct {
+  unsigned long flags;
+  unsigned char count; // expected-note {{'count' declared here}}
+  struct A {
+    int dummy;
+    int * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' field 'count' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_count_not_in_unnamed_substruct {
+  unsigned char count; // expected-note {{'count' declared here}}
+  struct {
+    int dummy;
+    int * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' field 'count' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_count_not_in_unnamed_substruct_2 {
+  struct {
+    unsigned char count; // expected-note {{'count' declared here}}
+  };
+  struct {
+    int dummy;
+    int * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' field 'count' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_count_in_other_unnamed_substruct {
+  struct {
+    unsigned char count;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __counted_by_or_null(count); // expected-error {{use of undeclared identifier 'count'}}
+  };
+};
+
+struct not_found_count_in_other_substruct {
+  struct _a1 {
+    unsigned char count;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __counted_by_or_null(count); // expected-error {{use of undeclared identifier 'count'}}
+  };
+};
+
+struct not_found_count_in_other_substruct_2 {
+  struct _a2 {
+    unsigned char count;
+  } a2;
+
+  int * ptr __counted_by_or_null(count); // expected-error {{use of undeclared identifier 'count'}}
+};
+
+struct not_found_suggest {
+  int bork;
+  struct bar **ptr __counted_by_or_null(blork); // expected-error {{use of undeclared identifier 'blork'}}
+};
+
+int global; // expected-note {{'global' declared here}}
+
+struct found_outside_of_struct {
+  int bork;
+  struct bar ** ptr __counted_by_or_null(global); // expected-error {{field 'global' in 'counted_by_or_null' not inside structure}}
+};
+
+struct self_referrential {
+  int bork;
+  struct bar *self[] __counted_by_or_null(self); // expected-error {{use of undeclared identifier 'self'}}
+};
+
+struct non_int_count {
+  double dbl_count;
+  struct bar ** ptr __counted_by_or_null(dbl_count); // expected-error {{'counted_by_or_null' requires a non-boolean integer type argument}}
+};
+
+struct array_of_ints_count {
+  int integers[2];
+  struct bar ** ptr __counted_by_or_null(integers); // expected-error {{'counted_by_or_null' requires a non-boolean integer type argument}}
+};
+
+struct not_a_c99_fam {
+  int count;
+  struct bar *non_c99_fam[0] __counted_by_or_null(count); // expected-error {{'counted_by_or_null' only applies to pointers; did you mean to use 'counted_by'?}}
+};
+
+struct annotated_with_anon_struct {
+  unsigned long flags;
+  struct {
+    unsigned char count;
+    int * ptr __counted_by_or_null(crount); // expected-error {{use of undeclared identifier 'crount'}}
+  };
+};
+
+//==============================================================================
+// __counted_by_or_null on a struct ptr with element type that has unknown count
+//==============================================================================
+
+struct count_unknown;
+struct on_member_ptr_incomplete_ty_ty_pos {
+  int count;
+  struct count_unknown * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct count_unknown' is an incomplete type}}
+};
+
+struct on_member_ptr_incomplete_const_ty_ty_pos {
+  int count;
+  const struct count_unknown * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'const struct count_unknown' is an incomplete type}}
+};
+
+struct on_member_ptr_void_ty_ty_pos {
+  int count;
+  void * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+};
+
+typedef void(fn_ty)(int);
+
+struct on_member_ptr_fn_ptr_ty {
+  int count;
+  fn_ty* * ptr __counted_by_or_null(count);
+};
+
+struct on_member_ptr_fn_ty {
+  int count;
+  fn_ty * ptr __counted_by_or_null(count); // expected-error {{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'fn_ty' (aka 'void (int)') is a function type}}
+};
diff --git a/clang/test/Sema/attr-counted-by-or-null-late-parsed-off.c b/clang/test/Sema/attr-counted-by-or-null-late-parsed-off.c
new file mode 100644
index 00000000000000..0e76ad9e48b4e2
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-late-parsed-off.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify=ok %s
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fsyntax-only -verify=ok %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+struct size_known { int dummy; };
+
+#ifdef NEEDS_LATE_PARSING
+struct on_decl {
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *buf __counted_by_or_null(count);
+  int count;
+};
+
+#else
+
+// ok-no-diagnostics
+struct on_decl {
+  int count;
+  struct size_known *buf __counted_by_or_null(count);
+};
+
+#endif
diff --git a/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c b/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..95f517e3144f72
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-late-parsed-struct-ptrs.c
@@ -0,0 +1,255 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+#define __counted_by(f)  __attribute__((counted_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known * buf __counted_by_or_null(count);
+  int count;
+};
+
+struct on_member_pointer_incomplete_ty {
+  struct size_unknown * buf __counted_by_or_null(count); // expected-error{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct size_unknown' is an incomplete type}}
+  int count;
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'const struct size_unknown' is an incomplete type}}
+  const struct size_unknown * buf __counted_by_or_null(count);
+  int count;
+};
+
+struct on_member_pointer_void_ty {
+  void* buf __counted_by_or_null(count); // expected-error{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  // buffer of `count` function pointers is allowed
+  void (**fn_ptr)(void) __counted_by_or_null(count);
+  int count;
+};
+
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  // buffer of `count` function pointers is allowed
+  fn_ptr_ty* fn_ptr __counted_by_or_null(count);
+  int count;
+};
+
+struct on_member_pointer_fn_ty {
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __counted_by_or_null(count);
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __counted_by_or_null(count);
+  int count;
+};
+
+struct has_unannotated_vla {
+  int count;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __counted_by_or_null(count);
+  int count;
+};
+
+struct has_annotated_vla {
+  int count;
+  int buffer[] __counted_by(count);
+};
+
+// Currently prevented because computing the size of `objects` at runtime would
+// require an O(N) walk of `objects` to take into account the length of the VLA
+// in each struct instance.
+struct on_member_pointer_struct_with_annotated_vla {
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __counted_by_or_null(count);
+  int count;
+};
+
+struct on_pointer_anon_buf {
+  // TODO: Support referring to parent scope
+  struct {
+    // expected-error@+1{{use of undeclared identifier 'count'}}
+    struct size_known *buf __counted_by_or_null(count);
+  };
+  int count;
+};
+
+struct on_pointer_anon_count {
+  struct size_known *buf __counted_by_or_null(count);
+  struct {
+    int count;
+  };
+};
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse counted_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `count`
+// field being unavailable.
+
+struct on_member_pointer_complete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *__counted_by_or_null(count) buf;
+  int count;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_unknown * __counted_by_or_null(count) buf;
+  int count;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  const struct size_unknown * __counted_by_or_null(count) buf;
+  int count;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being an incomplete type.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  void *__counted_by_or_null(count) buf;
+  int count;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  // TODO: buffer of `count` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  void (** __counted_by_or_null(count) fn_ptr)(void);
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  // TODO: buffer of `count` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  fn_ptr_ty* __counted_by_or_null(count) fn_ptr;
+  int count;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a function type.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  void (* __counted_by_or_null(count) fn_ptr)(void);
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  // TODO: buffer of `count` function pointers should be allowed
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  void (** __counted_by_or_null(count) fn_ptr)(void);
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty_typedef_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a function type.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  fn_ptr_ty __counted_by_or_null(count) fn_ptr;
+  int count;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a function type.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  void (* __counted_by_or_null(count) * fn_ptr)(void);
+  int count;
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a struct type with a VLA.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct has_unannotated_vla *__counted_by_or_null(count) objects;
+  int count;
+};
+
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a struct type with a VLA.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct has_annotated_vla* __counted_by_or_null(count) objects;
+  int count;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__counted_by_or_null` can only be nested when used in function parameters.
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *__counted_by_or_null(count) *buf;
+  int count;
+};
+
+struct on_nested_pointer_outer {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known **__counted_by_or_null(count) buf;
+  int count;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  struct {
+    // TODO: Support referring to parent scope
+    // expected-error@+1{{use of undeclared identifier 'count'}}
+    struct size_known * __counted_by_or_null(count) buf;
+  };
+  int count;
+};
+
+struct on_pointer_anon_count_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *__counted_by_or_null(count) buf;
+  struct {
+    int count;
+  };
+};
+
+//==============================================================================
+// __counted_by_or_null on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  // expected-error-re@+1{{'counted_by_or_null' only applies to pointers{{$}}}}
+  int wrong_ty __counted_by_or_null(count);
+  int count;
+};
+
+struct on_void_ty {
+  // expected-error-re@+2{{'counted_by_or_null' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __counted_by_or_null(count);
+  int count;
+};
diff --git a/clang/test/Sema/attr-counted-by-or-null-struct-ptrs-sizeless-types.c b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs-sizeless-types.c
new file mode 100644
index 00000000000000..301977300b06a1
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs-sizeless-types.c
@@ -0,0 +1,17 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+struct on_sizeless_pointee_ty {
+    int count;
+    // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because '__SVInt8_t' is a sizeless type}}
+    __SVInt8_t* member __counted_by_or_null(count);
+};
+
+struct on_sizeless_ty {
+    int count;
+    // expected-error-re@+2{{'counted_by_or_null' only applies to pointers{{$}}}}
+    // expected-error@+1{{field has sizeless type '__SVInt8_t'}}
+    __SVInt8_t member __counted_by_or_null(count);
+};
diff --git a/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c
new file mode 100644
index 00000000000000..017aafe0c9396a
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-struct-ptrs.c
@@ -0,0 +1,225 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+#define __counted_by(f)  __attribute__((counted_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  int count;
+  struct size_known * buf __counted_by_or_null(count);
+};
+
+struct on_member_pointer_incomplete_ty {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct size_unknown' is an incomplete type}}
+  struct size_unknown * buf __counted_by_or_null(count);
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'const struct size_unknown' is an incomplete type}}
+  const struct size_unknown * buf __counted_by_or_null(count);
+};
+
+struct on_member_pointer_void_ty {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+  void* buf __counted_by_or_null(count);
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  int count;
+  // buffer of `count` function pointers is allowed
+  void (**fn_ptr)(void) __counted_by_or_null(count);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  int count;
+  // buffer of `count` function pointers is allowed
+  fn_ptr_ty* fn_ptr __counted_by_or_null(count);
+};
+
+struct on_member_pointer_fn_ty {
+  int count;
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __counted_by_or_null(count);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  int count;
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __counted_by_or_null(count);
+};
+
+struct has_unannotated_vla {
+  int count;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __counted_by_or_null(count);
+};
+
+struct has_annotated_vla {
+  int count;
+  int buffer[] __counted_by(count);
+};
+
+// Currently prevented because computing the size of `objects` at runtime would
+// require an O(N) walk of `objects` to take into account the length of the VLA
+// in each struct instance.
+struct on_member_pointer_struct_with_annotated_vla {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __counted_by_or_null(count);
+};
+
+struct on_pointer_anon_buf {
+  int count;
+  struct {
+    struct size_known *buf __counted_by_or_null(count);
+  };
+};
+
+struct on_pointer_anon_count {
+  struct {
+    int count;
+  };
+  struct size_known *buf __counted_by_or_null(count);
+};
+
+//==============================================================================
+// __counted_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse counted_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+struct on_member_pointer_complete_ty_ty_pos {
+  int count;
+  struct size_known *__counted_by_or_null(count) buf;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct size_unknown' is an incomplete type}}
+  struct size_unknown * __counted_by_or_null(count) buf;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'const struct size_unknown' is an incomplete type}}
+  const struct size_unknown * __counted_by_or_null(count) buf;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void' is an incomplete type}}
+  void *__counted_by_or_null(count) buf;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  int count;
+  // buffer of `count` function pointers is allowed
+  void (** __counted_by_or_null(count) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  int count;
+  // buffer of `count` function pointers is allowed
+  fn_ptr_ty* __counted_by_or_null(count) fn_ptr;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  int count;
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (* __counted_by_or_null(count) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  int count;
+  // buffer of `count` functions is not allowed
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty __counted_by_or_null(count) fn_ptr;
+};
+
+// TODO: This should be forbidden but isn't due to counted_by_or_null being treated
+// as a declaration attribute.
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int count;
+  void (* __counted_by_or_null(count) * fn_ptr)(void);
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  int count;
+  // expected-error@+1{{'counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla *__counted_by_or_null(count) objects;
+};
+
+// Currently prevented because computing the size of `objects` at runtime would
+// require an O(N) walk of `objects` to take into account the length of the VLA
+// in each struct instance.
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  int count;
+  // expected-error@+1{{counted_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* __counted_by_or_null(count) objects;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__counted_by_or_null` can only be nested when used in function parameters.
+  int count;
+  struct size_known *__counted_by_or_null(count) *buf;
+};
+
+struct on_nested_pointer_outer {
+  int count;
+  struct size_known **__counted_by_or_null(count) buf;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  int count;
+  struct {
+    struct size_known * __counted_by_or_null(count) buf;
+  };
+};
+
+struct on_pointer_anon_count_ty_pos {
+  struct {
+    int count;
+  };
+  struct size_known *__counted_by_or_null(count) buf;
+};
+
+//==============================================================================
+// __counted_by_or_null on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  int count;
+  // expected-error-re@+1{{'counted_by_or_null' only applies to pointers{{$}}}}
+  int wrong_ty __counted_by_or_null(count);
+};
+
+struct on_void_ty {
+  int count;
+  // expected-error-re@+2{{'counted_by_or_null' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __counted_by_or_null(count);
+};
diff --git a/clang/test/Sema/attr-counted-by-or-null-vla-sizeless-types.c b/clang/test/Sema/attr-counted-by-or-null-vla-sizeless-types.c
new file mode 100644
index 00000000000000..8abd4476fe5977
--- /dev/null
+++ b/clang/test/Sema/attr-counted-by-or-null-vla-sizeless-types.c
@@ -0,0 +1,11 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __counted_by_or_null(f)  __attribute__((counted_by_or_null(f)))
+
+struct on_sizeless_elt_ty {
+    int count;
+    // expected-error-re@+2{{'counted_by_or_null' only applies to pointers{{$}}}}
+    // expected-error@+1{{array has sizeless element type '__SVInt8_t'}}
+    __SVInt8_t arr[] __counted_by_or_null(count);
+};
diff --git a/clang/test/Sema/attr-counted-by-vla.c b/clang/test/Sema/attr-counted-by-vla.c
index b25f719f3b95ab..571d6e6291e6bc 100644
--- a/clang/test/Sema/attr-counted-by-vla.c
+++ b/clang/test/Sema/attr-counted-by-vla.c
@@ -14,7 +14,7 @@ struct no_found_count_not_in_substruct {
   unsigned char count; // expected-note {{'count' declared here}}
   struct A {
     int dummy;
-    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the flexible array}}
+    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the annotated flexible array}}
   } a;
 };
 
@@ -22,7 +22,7 @@ struct not_found_count_not_in_unnamed_substruct {
   unsigned char count; // expected-note {{'count' declared here}}
   struct {
     int dummy;
-    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the flexible array}}
+    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the annotated flexible array}}
   } a;
 };
 
@@ -32,7 +32,7 @@ struct not_found_count_not_in_unnamed_substruct_2 {
   };
   struct {
     int dummy;
-    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the flexible array}}
+    int array[] __counted_by(count); // expected-error {{'counted_by' field 'count' isn't within the same struct as the annotated flexible array}}
   } a;
 };
 
diff --git a/clang/test/Sema/attr-sized-by-last-field.c b/clang/test/Sema/attr-sized-by-last-field.c
new file mode 100644
index 00000000000000..6af29e9f31435d
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-last-field.c
@@ -0,0 +1,142 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+// This has been adapted from clang/test/Sema/attr-counted-by-vla.c, but with VLAs replaced with pointers
+
+struct bar;
+
+struct not_found {
+  int size;
+  struct bar *ptr __sized_by(bork); // expected-error {{use of undeclared identifier 'bork'}}
+};
+
+struct no_found_size_not_in_substruct {
+  unsigned long flags;
+  unsigned char size; // expected-note {{'size' declared here}}
+  struct A {
+    int dummy;
+    int * ptr __sized_by(size); // expected-error {{'sized_by' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_not_in_unnamed_substruct {
+  unsigned char size; // expected-note {{'size' declared here}}
+  struct {
+    int dummy;
+    int * ptr __sized_by(size); // expected-error {{'sized_by' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_not_in_unnamed_substruct_2 {
+  struct {
+    unsigned char size; // expected-note {{'size' declared here}}
+  };
+  struct {
+    int dummy;
+    int * ptr __sized_by(size); // expected-error {{'sized_by' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_in_other_unnamed_substruct {
+  struct {
+    unsigned char size;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __sized_by(size); // expected-error {{use of undeclared identifier 'size'}}
+  };
+};
+
+struct not_found_size_in_other_substruct {
+  struct _a1 {
+    unsigned char size;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __sized_by(size); // expected-error {{use of undeclared identifier 'size'}}
+  };
+};
+
+struct not_found_size_in_other_substruct_2 {
+  struct _a2 {
+    unsigned char size;
+  } a2;
+
+  int * ptr __sized_by(size); // expected-error {{use of undeclared identifier 'size'}}
+};
+
+struct not_found_suggest {
+  int bork;
+  struct bar **ptr __sized_by(blork); // expected-error {{use of undeclared identifier 'blork'}}
+};
+
+int global; // expected-note {{'global' declared here}}
+
+struct found_outside_of_struct {
+  int bork;
+  struct bar ** ptr __sized_by(global); // expected-error {{field 'global' in 'sized_by' not inside structure}}
+};
+
+struct self_referrential {
+  int bork;
+  struct bar *self[] __sized_by(self); // expected-error {{use of undeclared identifier 'self'}}
+};
+
+struct non_int_size {
+  double dbl_size;
+  struct bar ** ptr __sized_by(dbl_size); // expected-error {{'sized_by' requires a non-boolean integer type argument}}
+};
+
+struct array_of_ints_size {
+  int integers[2];
+  struct bar ** ptr __sized_by(integers); // expected-error {{'sized_by' requires a non-boolean integer type argument}}
+};
+
+struct not_a_c99_fam {
+  int size;
+  struct bar *non_c99_fam[0] __sized_by(size); // expected-error {{'sized_by' only applies to pointers; did you mean to use 'counted_by'?}}
+};
+
+struct annotated_with_anon_struct {
+  unsigned long flags;
+  struct {
+    unsigned char size;
+    int * ptr __sized_by(crount); // expected-error {{use of undeclared identifier 'crount'}}
+  };
+};
+
+//==============================================================================
+// __sized_by on a struct ptr with element type that has unknown size
+//==============================================================================
+
+struct size_unknown;
+struct on_member_ptr_incomplete_ty_ty_pos {
+  int size;
+  struct size_unknown * ptr __sized_by(size);
+};
+
+struct on_member_ptr_incomplete_const_ty_ty_pos {
+  int size;
+  const struct size_unknown * ptr __sized_by(size);
+};
+
+struct on_member_ptr_void_ty_ty_pos {
+  int size;
+  void * ptr __sized_by(size);
+};
+
+typedef void(fn_ty)(int);
+
+struct on_member_ptr_fn_ptr_ty {
+  int size;
+  fn_ty* * ptr __sized_by(size);
+};
+
+struct on_member_ptr_fn_ty {
+  int size;
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'fn_ty' (aka 'void (int)') is a function type}}
+  fn_ty * ptr __sized_by(size);
+};
diff --git a/clang/test/Sema/attr-sized-by-late-parsed-off.c b/clang/test/Sema/attr-sized-by-late-parsed-off.c
new file mode 100644
index 00000000000000..e43125c8ce2f94
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-late-parsed-off.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify=ok %s
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fsyntax-only -verify=ok %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct size_known { int dummy; };
+
+#ifdef NEEDS_LATE_PARSING
+struct on_decl {
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *buf __sized_by(count);
+  int count;
+};
+
+#else
+
+// ok-no-diagnostics
+struct on_decl {
+  int count;
+  struct size_known *buf __sized_by(count);
+};
+
+#endif
diff --git a/clang/test/Sema/attr-sized-by-late-parsed-struct-ptrs.c b/clang/test/Sema/attr-sized-by-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..07f8801787d668
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-late-parsed-struct-ptrs.c
@@ -0,0 +1,247 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __sized_by on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known * buf __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_incomplete_ty {
+  struct size_unknown * buf __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  const struct size_unknown * buf __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_void_ty {
+  void* buf __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  // buffer of `size` function pointers is allowed
+  void (**fn_ptr)(void) __sized_by(size);
+  int size;
+};
+
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  // buffer of `size` function pointers is allowed
+  fn_ptr_ty* fn_ptr __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ty {
+  // buffer of function(s) with size `size` is allowed
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __sized_by(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  // buffer of function(s) with size `size` is allowed
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __sized_by(size);
+  int size;
+};
+
+struct has_unannotated_vla {
+  int size;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __sized_by(size);
+  int size;
+};
+
+struct has_annotated_vla {
+  int size;
+  // expected-error@+1{{'sized_by' only applies to pointers; did you mean to use 'counted_by'?}}
+  int buffer[] __sized_by(size);
+};
+
+struct on_member_pointer_struct_with_annotated_vla {
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __sized_by(size);
+  int size;
+};
+
+struct on_pointer_anon_buf {
+  // TODO: Support referring to parent scope
+  struct {
+    // expected-error@+1{{use of undeclared identifier 'size'}}
+    struct size_known *buf __sized_by(size);
+  };
+  int size;
+};
+
+struct on_pointer_anon_count {
+  struct size_known *buf __sized_by(size);
+  struct {
+    int size;
+  };
+};
+
+//==============================================================================
+// __sized_by on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `size`
+// field being unavailable.
+
+struct on_member_pointer_complete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by(size) buf;
+  int size;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_unknown * __sized_by(size) buf;
+  int size;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  const struct size_unknown * __sized_by(size) buf;
+  int size;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being an incomplete type.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void *__sized_by(size) buf;
+  int size;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (** __sized_by(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  fn_ptr_ty* __sized_by(size) fn_ptr;
+  int size;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a function type.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (* __sized_by(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (** __sized_by(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_typedef_ty_pos {
+  // TODO: This should be allowed with sized_by.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  fn_ptr_ty __sized_by(size) fn_ptr;
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  // TODO: This should be allowed with sized_by.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (* __sized_by(size) * fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  // TODO: This should be allowed with sized_by.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct has_unannotated_vla *__sized_by(size) objects;
+  int size;
+};
+
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  // TODO: This should be allowed with sized_by.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct has_annotated_vla* __sized_by(size) objects;
+  int size;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by` can only be nested when used in function parameters.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by(size) *buf;
+  int size;
+};
+
+struct on_nested_pointer_outer {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known **__sized_by(size) buf;
+  int size;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  struct {
+    // TODO: Support referring to parent scope
+    // expected-error@+1{{use of undeclared identifier 'size'}}
+    struct size_known * __sized_by(size) buf;
+  };
+  int size;
+};
+
+struct on_pointer_anon_count_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by(size) buf;
+  struct {
+    int size;
+  };
+};
+
+//==============================================================================
+// __sized_by on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  // expected-error-re@+1{{'sized_by' only applies to pointers{{$}}}}
+  int wrong_ty __sized_by(size);
+  int size;
+};
+
+struct on_void_ty {
+  // expected-error-re@+2{{'sized_by' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __sized_by(size);
+  int size;
+};
diff --git a/clang/test/Sema/attr-sized-by-or-null-last-field.c b/clang/test/Sema/attr-sized-by-or-null-last-field.c
new file mode 100644
index 00000000000000..96bbe847b910bf
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-last-field.c
@@ -0,0 +1,142 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+// This has been adapted from clang/test/Sema/attr-counted-by-vla.c, but with VLAs replaced with pointers
+
+struct bar;
+
+struct not_found {
+  int size;
+  struct bar *ptr __sized_by_or_null(bork); // expected-error {{use of undeclared identifier 'bork'}}
+};
+
+struct no_found_size_not_in_substruct {
+  unsigned long flags;
+  unsigned char size; // expected-note {{'size' declared here}}
+  struct A {
+    int dummy;
+    int * ptr __sized_by_or_null(size); // expected-error {{'sized_by_or_null' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_not_in_unnamed_substruct {
+  unsigned char size; // expected-note {{'size' declared here}}
+  struct {
+    int dummy;
+    int * ptr __sized_by_or_null(size); // expected-error {{'sized_by_or_null' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_not_in_unnamed_substruct_2 {
+  struct {
+    unsigned char size; // expected-note {{'size' declared here}}
+  };
+  struct {
+    int dummy;
+    int * ptr __sized_by_or_null(size); // expected-error {{'sized_by_or_null' field 'size' isn't within the same struct as the annotated pointer}}
+  } a;
+};
+
+struct not_found_size_in_other_unnamed_substruct {
+  struct {
+    unsigned char size;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __sized_by_or_null(size); // expected-error {{use of undeclared identifier 'size'}}
+  };
+};
+
+struct not_found_size_in_other_substruct {
+  struct _a1 {
+    unsigned char size;
+  } a1;
+
+  struct {
+    int dummy;
+    int * ptr __sized_by_or_null(size); // expected-error {{use of undeclared identifier 'size'}}
+  };
+};
+
+struct not_found_size_in_other_substruct_2 {
+  struct _a2 {
+    unsigned char size;
+  } a2;
+
+  int * ptr __sized_by_or_null(size); // expected-error {{use of undeclared identifier 'size'}}
+};
+
+struct not_found_suggest {
+  int bork;
+  struct bar **ptr __sized_by_or_null(blork); // expected-error {{use of undeclared identifier 'blork'}}
+};
+
+int global; // expected-note {{'global' declared here}}
+
+struct found_outside_of_struct {
+  int bork;
+  struct bar ** ptr __sized_by_or_null(global); // expected-error {{field 'global' in 'sized_by_or_null' not inside structure}}
+};
+
+struct self_referrential {
+  int bork;
+  struct bar *self[] __sized_by_or_null(self); // expected-error {{use of undeclared identifier 'self'}}
+};
+
+struct non_int_size {
+  double dbl_size;
+  struct bar ** ptr __sized_by_or_null(dbl_size); // expected-error {{'sized_by_or_null' requires a non-boolean integer type argument}}
+};
+
+struct array_of_ints_size {
+  int integers[2];
+  struct bar ** ptr __sized_by_or_null(integers); // expected-error {{'sized_by_or_null' requires a non-boolean integer type argument}}
+};
+
+struct not_a_c99_fam {
+  int size;
+  struct bar *non_c99_fam[0] __sized_by_or_null(size); // expected-error {{'sized_by_or_null' only applies to pointers; did you mean to use 'counted_by'?}}
+};
+
+struct annotated_with_anon_struct {
+  unsigned long flags;
+  struct {
+    unsigned char size;
+    int * ptr __sized_by_or_null(crount); // expected-error {{use of undeclared identifier 'crount'}}
+  };
+};
+
+//==============================================================================
+// __sized_by_or_null on a struct ptr with element type that has unknown size
+//==============================================================================
+
+struct size_unknown;
+struct on_member_ptr_incomplete_ty_ty_pos {
+  int size;
+  struct size_unknown * ptr __sized_by_or_null(size);
+};
+
+struct on_member_ptr_incomplete_const_ty_ty_pos {
+  int size;
+  const struct size_unknown * ptr __sized_by_or_null(size);
+};
+
+struct on_member_ptr_void_ty_ty_pos {
+  int size;
+  void * ptr __sized_by_or_null(size);
+};
+
+typedef void(fn_ty)(int);
+
+struct on_member_ptr_fn_ptr_ty {
+  int size;
+  fn_ty* * ptr __sized_by_or_null(size);
+};
+
+struct on_member_ptr_fn_ty {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'fn_ty' (aka 'void (int)') is a function type}}
+  fn_ty * ptr __sized_by_or_null(size);
+};
diff --git a/clang/test/Sema/attr-sized-by-or-null-late-parsed-off.c b/clang/test/Sema/attr-sized-by-or-null-late-parsed-off.c
new file mode 100644
index 00000000000000..8bc775f196c18b
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-late-parsed-off.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -DNEEDS_LATE_PARSING -fsyntax-only -verify %s
+
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fno-experimental-late-parse-attributes -fsyntax-only -verify=ok %s
+// RUN: %clang_cc1 -UNEEDS_LATE_PARSING -fsyntax-only -verify=ok %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+struct size_known { int dummy; };
+
+#ifdef NEEDS_LATE_PARSING
+struct on_decl {
+  // expected-error@+1{{use of undeclared identifier 'count'}}
+  struct size_known *buf __sized_by_or_null(count);
+  int count;
+};
+
+#else
+
+// ok-no-diagnostics
+struct on_decl {
+  int count;
+  struct size_known *buf __sized_by_or_null(count);
+};
+
+#endif
diff --git a/clang/test/Sema/attr-sized-by-or-null-late-parsed-struct-ptrs.c b/clang/test/Sema/attr-sized-by-or-null-late-parsed-struct-ptrs.c
new file mode 100644
index 00000000000000..afe5f0af280830
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-late-parsed-struct-ptrs.c
@@ -0,0 +1,247 @@
+// RUN: %clang_cc1 -fexperimental-late-parse-attributes -fsyntax-only -verify %s
+
+#define __sized_by_or_null(f)  __attribute__((__sized_by_or_null__(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  struct size_known * buf __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_incomplete_ty {
+  struct size_unknown * buf __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  const struct size_unknown * buf __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_void_ty {
+  void* buf __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  // buffer of `size` function pointers is allowed
+  void (**fn_ptr)(void) __sized_by_or_null(size);
+  int size;
+};
+
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  // buffer of `size` function pointers is allowed
+  fn_ptr_ty* fn_ptr __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ty {
+  // buffer of function(s) with size `size` is allowed
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __sized_by_or_null(size);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  // buffer of function(s) with size `size` is allowed
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __sized_by_or_null(size);
+  int size;
+};
+
+struct has_unannotated_vla {
+  int size;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __sized_by_or_null(size);
+  int size;
+};
+
+struct has_annotated_vla {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' only applies to pointers; did you mean to use 'counted_by'?}}
+  int buffer[] __sized_by_or_null(size);
+};
+
+struct on_member_pointer_struct_with_annotated_vla {
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __sized_by_or_null(size);
+  int size;
+};
+
+struct on_pointer_anon_buf {
+  // TODO: Support referring to parent scope
+  struct {
+    // expected-error@+1{{use of undeclared identifier 'size'}}
+    struct size_known *buf __sized_by_or_null(size);
+  };
+  int size;
+};
+
+struct on_pointer_anon_count {
+  struct size_known *buf __sized_by_or_null(size);
+  struct {
+    int size;
+  };
+};
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute and is **not** late parsed resulting in the `size`
+// field being unavailable.
+
+struct on_member_pointer_complete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by_or_null(size) buf;
+  int size;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_unknown * __sized_by_or_null(size) buf;
+  int size;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  const struct size_unknown * __sized_by_or_null(size) buf;
+  int size;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being an incomplete type.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void *__sized_by_or_null(size) buf;
+  int size;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (** __sized_by_or_null(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // but fails because this isn't late parsed.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  fn_ptr_ty* __sized_by_or_null(size) fn_ptr;
+  int size;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  // TODO: This should fail because the attribute is
+  // on a pointer with the pointee being a function type.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (* __sized_by_or_null(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  // TODO: buffer of `size` function pointers should be allowed
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (** __sized_by_or_null(size) fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_typedef_ty_pos {
+  // TODO: This should be allowed with sized_by_or_null.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  fn_ptr_ty __sized_by_or_null(size) fn_ptr;
+  int size;
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  // TODO: This should be allowed with sized_by_or_null.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  void (* __sized_by_or_null(size) * fn_ptr)(void);
+  int size;
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  // TODO: This should be allowed with sized_by_or_null.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct has_unannotated_vla *__sized_by_or_null(size) objects;
+  int size;
+};
+
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  // TODO: This should be allowed with sized_by_or_null.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct has_annotated_vla* __sized_by_or_null(size) objects;
+  int size;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by_or_null` can only be nested when used in function parameters.
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by_or_null(size) *buf;
+  int size;
+};
+
+struct on_nested_pointer_outer {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known **__sized_by_or_null(size) buf;
+  int size;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  struct {
+    // TODO: Support referring to parent scope
+    // expected-error@+1{{use of undeclared identifier 'size'}}
+    struct size_known * __sized_by_or_null(size) buf;
+  };
+  int size;
+};
+
+struct on_pointer_anon_count_ty_pos {
+  // TODO: Allow this
+  // expected-error@+1{{use of undeclared identifier 'size'}}
+  struct size_known *__sized_by_or_null(size) buf;
+  struct {
+    int size;
+  };
+};
+
+//==============================================================================
+// __sized_by_or_null on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  // expected-error-re@+1{{'sized_by_or_null' only applies to pointers{{$}}}}
+  int wrong_ty __sized_by_or_null(size);
+  int size;
+};
+
+struct on_void_ty {
+  // expected-error-re@+2{{'sized_by_or_null' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __sized_by_or_null(size);
+  int size;
+};
diff --git a/clang/test/Sema/attr-sized-by-or-null-struct-ptrs-sizeless-types.c b/clang/test/Sema/attr-sized-by-or-null-struct-ptrs-sizeless-types.c
new file mode 100644
index 00000000000000..4a360b9722a0b0
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-struct-ptrs-sizeless-types.c
@@ -0,0 +1,17 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+struct on_sizeless_pointee_ty {
+    int size;
+    // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because '__SVInt8_t' is a sizeless type}}
+    __SVInt8_t* member __sized_by_or_null(size);
+};
+
+struct on_sizeless_ty {
+    int size;
+    // expected-error-re@+2{{'sized_by_or_null' only applies to pointers{{$}}}}
+    // expected-error@+1{{field has sizeless type '__SVInt8_t'}}
+    __SVInt8_t member __sized_by_or_null(size);
+};
diff --git a/clang/test/Sema/attr-sized-by-or-null-struct-ptrs.c b/clang/test/Sema/attr-sized-by-or-null-struct-ptrs.c
new file mode 100644
index 00000000000000..2c7578b5ecbe64
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-struct-ptrs.c
@@ -0,0 +1,219 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+#define __counted_by(f)  __attribute__((counted_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  int size;
+  struct size_known * buf __sized_by_or_null(size);
+};
+
+struct on_member_pointer_incomplete_ty {
+  int size;
+  struct size_unknown * buf __sized_by_or_null(size);
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  int size;
+  const struct size_unknown * buf __sized_by_or_null(size);
+};
+
+struct on_member_pointer_void_ty {
+  int size;
+  void* buf __sized_by_or_null(size);
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  int size;
+  // buffer of function pointers with size `size` is allowed
+  void (**fn_ptr)(void) __sized_by_or_null(size);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  int size;
+  // buffer of function pointers with size `size` is allowed
+  fn_ptr_ty* fn_ptr __sized_by_or_null(size);
+};
+
+struct on_member_pointer_fn_ty {
+  int size;
+  // buffer of functions with size `size` is allowed
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __sized_by_or_null(size);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  int size;
+  // buffer of functions with size `size` is allowed
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __sized_by_or_null(size);
+};
+
+struct has_unannotated_vla {
+  int count;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  int size;
+  // we know the size so this is fine for tracking size, however indexing would be an issue
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __sized_by_or_null(size);
+};
+
+struct has_annotated_vla {
+  int count;
+  int buffer[] __counted_by(count);
+};
+
+struct on_member_pointer_struct_with_annotated_vla {
+  int size;
+  // we know the size so this is fine for tracking size, however indexing would be an issue
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __sized_by_or_null(size);
+};
+
+struct on_pointer_anon_buf {
+  int size;
+  struct {
+    struct size_known *buf __sized_by_or_null(size);
+  };
+};
+
+struct on_pointer_anon_size {
+  struct {
+    int size;
+  };
+  struct size_known *buf __sized_by_or_null(size);
+};
+
+//==============================================================================
+// __sized_by_or_null on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by_or_null as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+struct on_member_pointer_complete_ty_ty_pos {
+  int size;
+  struct size_known *__sized_by_or_null(size) buf;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  int size;
+  struct size_unknown * __sized_by_or_null(size) buf;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  int size;
+  const struct size_unknown * __sized_by_or_null(size) buf;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  int size;
+  void *__sized_by_or_null(size) buf;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  int size;
+  // buffer of `size` function pointers is allowed
+  void (** __sized_by_or_null(size) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  int size;
+  // buffer of `size` function pointers is allowed
+  fn_ptr_ty* __sized_by_or_null(size) fn_ptr;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (* __sized_by_or_null(size) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty __sized_by_or_null(size) fn_ptr;
+};
+
+// TODO: This should be forbidden but isn't due to sized_by_or_null being treated
+// as a declaration attribute.
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int size;
+  void (* __sized_by_or_null(size) * fn_ptr)(void);
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla *__sized_by_or_null(size) objects;
+};
+
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* __sized_by_or_null(size) objects;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by_or_null` can only be nested when used in function parameters.
+  int size;
+  struct size_known *__sized_by_or_null(size) *buf;
+};
+
+struct on_nested_pointer_outer {
+  int size;
+  struct size_known **__sized_by_or_null(size) buf;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  int size;
+  struct {
+    struct size_known * __sized_by_or_null(size) buf;
+  };
+};
+
+struct on_pointer_anon_size_ty_pos {
+  struct {
+    int size;
+  };
+  struct size_known *__sized_by_or_null(size) buf;
+};
+
+//==============================================================================
+// __sized_by_or_null on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  int size;
+  // expected-error-re@+1{{'sized_by_or_null' only applies to pointers{{$}}}}
+  int wrong_ty __sized_by_or_null(size);
+};
+
+struct on_void_ty {
+  int size;
+  // expected-error-re@+2{{'sized_by_or_null' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __sized_by_or_null(size);
+};
+
+struct on_member_array_complete_ty {
+  int size;
+  // expected-error@+1{{'sized_by_or_null' only applies to pointers; did you mean to use 'counted_by'?}}
+  struct size_known array[] __sized_by_or_null(size);
+};
diff --git a/clang/test/Sema/attr-sized-by-or-null-vla-sizeless-types.c b/clang/test/Sema/attr-sized-by-or-null-vla-sizeless-types.c
new file mode 100644
index 00000000000000..398b1df592fe38
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-or-null-vla-sizeless-types.c
@@ -0,0 +1,11 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __sized_by_or_null(f)  __attribute__((sized_by_or_null(f)))
+
+struct on_sizeless_elt_ty {
+    int count;
+    // expected-error-re@+2{{'sized_by_or_null' only applies to pointers{{$}}}}
+    // expected-error@+1{{array has sizeless element type '__SVInt8_t'}}
+    __SVInt8_t arr[] __sized_by_or_null(count);
+};
diff --git a/clang/test/Sema/attr-sized-by-struct-ptrs-sizeless-types.c b/clang/test/Sema/attr-sized-by-struct-ptrs-sizeless-types.c
new file mode 100644
index 00000000000000..2e916bdb04720c
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-struct-ptrs-sizeless-types.c
@@ -0,0 +1,17 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct on_sizeless_pointee_ty {
+    int count;
+    // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because '__SVInt8_t' is a sizeless type}}
+    __SVInt8_t* member __sized_by(count);
+};
+
+struct on_sizeless_ty {
+    int count;
+    // expected-error-re@+2{{'sized_by' only applies to pointers{{$}}}}
+    // expected-error@+1{{field has sizeless type '__SVInt8_t'}}
+    __SVInt8_t member __sized_by(count);
+};
diff --git a/clang/test/Sema/attr-sized-by-struct-ptrs.c b/clang/test/Sema/attr-sized-by-struct-ptrs.c
new file mode 100644
index 00000000000000..01195469c6fe42
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-struct-ptrs.c
@@ -0,0 +1,219 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+#define __counted_by(f)  __attribute__((counted_by(f)))
+
+struct size_unknown;
+struct size_known {
+  int field;
+};
+
+typedef void(*fn_ptr_ty)(void);
+
+//==============================================================================
+// __sized_by on struct member pointer in decl attribute position
+//==============================================================================
+
+struct on_member_pointer_complete_ty {
+  int size;
+  struct size_known * buf __sized_by(size);
+};
+
+struct on_member_pointer_incomplete_ty {
+  int size;
+  struct size_unknown * buf __sized_by(size);
+};
+
+struct on_member_pointer_const_incomplete_ty {
+  int size;
+  const struct size_unknown * buf __sized_by(size);
+};
+
+struct on_member_pointer_void_ty {
+  int size;
+  void* buf __sized_by(size);
+};
+
+struct on_member_pointer_fn_ptr_ty {
+  int size;
+  // buffer of function pointers with size `size` is allowed
+  void (**fn_ptr)(void) __sized_by(size);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty {
+  int size;
+  // buffer of function pointers with size `size` is allowed
+  fn_ptr_ty* fn_ptr __sized_by(size);
+};
+
+struct on_member_pointer_fn_ty {
+  int size;
+  // buffer of functions with size `size` is allowed
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (*fn_ptr)(void) __sized_by(size);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty {
+  int size;
+  // buffer of functions with size `size` is allowed
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty fn_ptr __sized_by(size);
+};
+
+struct has_unannotated_vla {
+  int count;
+  int buffer[];
+};
+
+struct on_member_pointer_struct_with_vla {
+  int size;
+  // we know the size so this is fine for tracking size, however indexing would be an issue
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla* objects __sized_by(size);
+};
+
+struct has_annotated_vla {
+  int count;
+  int buffer[] __counted_by(count);
+};
+
+struct on_member_pointer_struct_with_annotated_vla {
+  int size;
+  // we know the size so this is fine for tracking size, however indexing would be an issue
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* objects __sized_by(size);
+};
+
+struct on_pointer_anon_buf {
+  int size;
+  struct {
+    struct size_known *buf __sized_by(size);
+  };
+};
+
+struct on_pointer_anon_size {
+  struct {
+    int size;
+  };
+  struct size_known *buf __sized_by(size);
+};
+
+//==============================================================================
+// __sized_by on struct member pointer in type attribute position
+//==============================================================================
+// TODO: Correctly parse sized_by as a type attribute. Currently it is parsed
+// as a declaration attribute
+
+struct on_member_pointer_complete_ty_ty_pos {
+  int size;
+  struct size_known *__sized_by(size) buf;
+};
+
+struct on_member_pointer_incomplete_ty_ty_pos {
+  int size;
+  struct size_unknown * __sized_by(size) buf;
+};
+
+struct on_member_pointer_const_incomplete_ty_ty_pos {
+  int size;
+  const struct size_unknown * __sized_by(size) buf;
+};
+
+struct on_member_pointer_void_ty_ty_pos {
+  int size;
+  void *__sized_by(size) buf;
+};
+
+// -
+
+struct on_member_pointer_fn_ptr_ty_pos {
+  int size;
+  // buffer of `size` function pointers is allowed
+  void (** __sized_by(size) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ptr_ty_pos {
+  int size;
+  // buffer of `size` function pointers is allowed
+  fn_ptr_ty* __sized_by(size) fn_ptr;
+};
+
+struct on_member_pointer_fn_ty_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  void (* __sized_by(size) fn_ptr)(void);
+};
+
+struct on_member_pointer_fn_ptr_ty_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'void (void)' is a function type}}
+  fn_ptr_ty __sized_by(size) fn_ptr;
+};
+
+// TODO: This should be forbidden but isn't due to sized_by being treated
+// as a declaration attribute.
+struct on_member_pointer_fn_ptr_ty_ty_pos_inner {
+  int size;
+  void (* __sized_by(size) * fn_ptr)(void);
+};
+
+struct on_member_pointer_struct_with_vla_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_unannotated_vla' is a struct type with a flexible array member}}
+  struct has_unannotated_vla *__sized_by(size) objects;
+};
+
+struct on_member_pointer_struct_with_annotated_vla_ty_pos {
+  int size;
+  // expected-error@+1{{'sized_by' cannot be applied to a pointer with pointee of unknown size because 'struct has_annotated_vla' is a struct type with a flexible array member}}
+  struct has_annotated_vla* __sized_by(size) objects;
+};
+
+struct on_nested_pointer_inner {
+  // TODO: This should be disallowed because in the `-fbounds-safety` model
+  // `__sized_by` can only be nested when used in function parameters.
+  int size;
+  struct size_known *__sized_by(size) *buf;
+};
+
+struct on_nested_pointer_outer {
+  int size;
+  struct size_known **__sized_by(size) buf;
+};
+
+struct on_pointer_anon_buf_ty_pos {
+  int size;
+  struct {
+    struct size_known * __sized_by(size) buf;
+  };
+};
+
+struct on_pointer_anon_size_ty_pos {
+  struct {
+    int size;
+  };
+  struct size_known *__sized_by(size) buf;
+};
+
+//==============================================================================
+// __sized_by on struct non-pointer members
+//==============================================================================
+
+struct on_pod_ty {
+  int size;
+  // expected-error-re@+1{{'sized_by' only applies to pointers{{$}}}}
+  int wrong_ty __sized_by(size);
+};
+
+struct on_void_ty {
+  int size;
+  // expected-error-re@+2{{'sized_by' only applies to pointers{{$}}}}
+  // expected-error@+1{{field has incomplete type 'void'}}
+  void wrong_ty __sized_by(size);
+};
+
+struct on_member_array_complete_ty {
+  int size;
+  // expected-error@+1{{'sized_by' only applies to pointers; did you mean to use 'counted_by'?}}
+  struct size_known array[] __sized_by(size);
+};
diff --git a/clang/test/Sema/attr-sized-by-vla-sizeless-types.c b/clang/test/Sema/attr-sized-by-vla-sizeless-types.c
new file mode 100644
index 00000000000000..37e91639bb4a1f
--- /dev/null
+++ b/clang/test/Sema/attr-sized-by-vla-sizeless-types.c
@@ -0,0 +1,11 @@
+// __SVInt8_t is specific to ARM64 so specify that in the target triple
+// RUN: %clang_cc1 -triple arm64-apple-darwin -fsyntax-only -verify %s
+
+#define __sized_by(f)  __attribute__((sized_by(f)))
+
+struct on_sizeless_elt_ty {
+    int count;
+    // expected-error-re@+2{{'sized_by' only applies to pointers{{$}}}}
+    // expected-error@+1{{array has sizeless element type '__SVInt8_t'}}
+    __SVInt8_t arr[] __sized_by(count);
+};
diff --git a/clang/test/Sema/no-warn-missing-prototype.c b/clang/test/Sema/no-warn-missing-prototype.c
index 6059b6aa0f1469..17d69ac8913faa 100644
--- a/clang/test/Sema/no-warn-missing-prototype.c
+++ b/clang/test/Sema/no-warn-missing-prototype.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -Wmissing-prototypes -x c -ffreestanding -verify %s
 // RUN: %clang_cc1 -fsyntax-only -Wmissing-prototypes -x c++ -ffreestanding -verify %s
+// RUN: %clang_cc1 -fms-compatibility -fsyntax-only -Wmissing-prototypes -x c++ -ffreestanding -triple=x86_64-pc-win32 -verify -DMS %s
 // expected-no-diagnostics
 int main() {
   return 0;
@@ -8,3 +9,21 @@ int main() {
 int efi_main() {
   return 0;
 }
+
+#ifdef MS
+int wmain(int, wchar_t *[], wchar_t *[]) {
+  return 0;
+}
+
+int wWinMain(void*, void*, wchar_t*, int) {
+  return 0;
+}
+
+int WinMain(void*, void*, char*, int) {
+  return 0;
+}
+
+bool DllMain(void*, unsigned, void*) {
+  return true;
+}
+#endif
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
index 52f31c1ff05759..e0e3872b566d9e 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -12,8 +12,7 @@ void test_ballot_wave32(global uint* out, int a, int b) {
   *out = __builtin_amdgcn_ballot_w32(a == b);  // expected-error {{'__builtin_amdgcn_ballot_w32' needs target feature wavefrontsize32}}
 }
 
-// FIXME: Should error for subtargets that don't support wave32
-__attribute__((target("wavefrontsize32")))
+__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
 void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
   *out = __builtin_amdgcn_ballot_w32(a == b);
 }
diff --git a/clang/test/SemaTemplate/nested-deduction-guides.cpp b/clang/test/SemaTemplate/nested-deduction-guides.cpp
index 38410b93ead3b9..30ede69e96e3f3 100644
--- a/clang/test/SemaTemplate/nested-deduction-guides.cpp
+++ b/clang/test/SemaTemplate/nested-deduction-guides.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -std=c++17 -verify %s
-// expected-no-diagnostics
 
 template<typename T> struct A {
   template<typename U> struct B {
@@ -16,3 +15,76 @@ using T = A<void>::B<int>;
 
 using Copy = decltype(copy);
 using Copy = A<void>::B<int>;
+
+namespace GH94614 {
+
+template <class, class> struct S {};
+
+struct trouble_1 {
+} constexpr t1;
+struct trouble_2 {
+} constexpr t2;
+struct trouble_3 {
+} constexpr t3;
+struct trouble_4 {
+} constexpr t4;
+struct trouble_5 {
+} constexpr t5;
+struct trouble_6 {
+} constexpr t6;
+struct trouble_7 {
+} constexpr t7;
+struct trouble_8 {
+} constexpr t8;
+struct trouble_9 {
+} constexpr t9;
+
+template <class U, class... T> struct Unrelated {
+  using Trouble = S<U, T...>;
+
+  template <class... V> using Trouble2 = S<V..., T...>;
+};
+
+template <class T, class U> struct Outer {
+  using Trouble = S<U, T>;
+
+  template <class V> using Trouble2 = S<V, T>;
+
+  template <class V> using Trouble3 = S<U, T>;
+
+  template <class V> struct Inner {
+    template <class W> struct Paranoid {
+      using Trouble4 = S<W, T>;
+
+      template <class... X> using Trouble5 = S<X..., T>;
+    };
+
+    Inner(trouble_1, V v, Trouble trouble) {}
+    Inner(trouble_2, V v, Trouble2<V> trouble) {}
+    Inner(trouble_3, V v, Trouble3<V> trouble) {}
+    Inner(trouble_4, V v, typename Unrelated<U, T>::template Trouble2<V> trouble) {}
+    Inner(trouble_5, V v, typename Unrelated<U, T>::Trouble trouble) {}
+    Inner(trouble_6, V v, typename Unrelated<V, T>::Trouble trouble) {}
+    Inner(trouble_7, V v, typename Paranoid<V>::Trouble4 trouble) {}
+    Inner(trouble_8, V v, typename Paranoid<V>::template Trouble5<V> trouble) {}
+    template <class W>
+    Inner(trouble_9, V v, W w, typename Paranoid<V>::template Trouble5<W> trouble) {}
+  };
+};
+
+S<int, char> s;
+
+Outer<char, int>::Inner _1(t1, 42, s);
+Outer<char, int>::Inner _2(t2, 42, s);
+Outer<char, int>::Inner _3(t3, 42, s);
+Outer<char, int>::Inner _4(t4, 42, s);
+Outer<char, int>::Inner _5(t5, 42, s);
+Outer<char, int>::Inner _6(t6, 42, s);
+Outer<char, int>::Inner _7(t7, 42, s);
+Outer<char, int>::Inner _8(t8, 42, s);
+Outer<char, int>::Inner _9(t9, 42, 24, s);
+
+// Make sure we don't accidentally inject the TypedefNameDecl into the TU.
+Trouble should_not_be_in_the_tu_decl; // expected-error {{unknown type name 'Trouble'}}
+
+} // namespace GH94614
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index c8bec41db36e99..bc152e304aaaf1 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -32,9 +32,9 @@ set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}
     ${LOONGARCH64})
 set(ALL_ASAN_ABI_SUPPORTED_ARCH ${X86_64} ${ARM64} ${ARM64_32})
 set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${LOONGARCH64})
-#set(ALL_RTSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}
-#    ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
-#    ${LOONGARCH64})
+set(ALL_RTSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}
+    ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
+    ${LOONGARCH64})
 
 if(ANDROID)
   set(OS_NAME "Android")
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 1130f9f721df4c..c0bee77bfe70fc 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -751,7 +751,8 @@ else()
   set(COMPILER_RT_HAS_ASAN FALSE)
 endif()
 
-if (COMPILER_RT_HAS_SANITIZER_COMMON AND RTSAN_SUPPORTED_ARCH)
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND RTSAN_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Android|Darwin|Linux")
   set(COMPILER_RT_HAS_RTSAN TRUE)
 else()
   set(COMPILER_RT_HAS_RTSAN FALSE)
diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt
index 0b5c302a4ce5d0..a35f12bc14265d 100644
--- a/compiler-rt/lib/memprof/tests/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt
@@ -49,7 +49,6 @@ endif()
 set(MEMPROF_UNITTEST_LINK_LIBRARIES
   ${COMPILER_RT_UNWINDER_LINK_LIBS}
   ${SANITIZER_TEST_CXX_LIBRARIES})
-append_list_if(COMPILER_RT_HAS_LIBDL -ldl MEMPROF_UNITTEST_LINK_LIBRARIES)
 
 # Adds memprof tests for each architecture.
 macro(add_memprof_tests_for_arch arch)
diff --git a/compiler-rt/lib/nsan/tests/CMakeLists.txt b/compiler-rt/lib/nsan/tests/CMakeLists.txt
index 3d962e70a7bf06..e472fc5c06f907 100644
--- a/compiler-rt/lib/nsan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/nsan/tests/CMakeLists.txt
@@ -23,9 +23,6 @@ set(NSAN_UNITTESTS
 
 add_custom_target(NsanUnitTests)
 
-# set(NSAN_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS} -ldl)
-# list(APPEND NSAN_UNITTEST_LINK_FLAGS --driver-mode=g++)
-
 if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST NSAN_SUPPORTED_ARCH)
   # NSan unit tests are only run on the host machine.
   set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH})
diff --git a/compiler-rt/lib/rtsan/tests/CMakeLists.txt b/compiler-rt/lib/rtsan/tests/CMakeLists.txt
index d96e538b255f4e..9eda116541ae36 100644
--- a/compiler-rt/lib/rtsan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/rtsan/tests/CMakeLists.txt
@@ -36,13 +36,17 @@ set(RTSAN_UNITTEST_LINK_FLAGS
   ${SANITIZER_TEST_CXX_LIBRARIES}
   -no-pie)
 
+append_list_if(COMPILER_RT_HAS_LIBDL -ldl RTSAN_UNITTEST_LINK_FLAGS)
+append_list_if(COMPILER_RT_HAS_LIBRT -lrt RTSAN_UNITTEST_LINK_FLAGS)
+append_list_if(COMPILER_RT_HAS_LIBM -lm RTSAN_UNITTEST_LINK_FLAGS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread RTSAN_UNITTEST_LINK_FLAGS)
+append_list_if(COMPILER_RT_HAS_LIBLOG -llog RTSAN_UNITTEST_LINK_FLAGS)
+
 if (APPLE)
   add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
   list(APPEND RTSAN_UNITTEST_LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS})
   list(APPEND RTSAN_UNITTEST_LINK_FLAGS ${DARWIN_osx_LINK_FLAGS})
   list(APPEND RTSAN_UNITTEST_CFLAGS ${DARWIN_osx_CFLAGS})
-else()
-  list(APPEND RTSAN_UNITTEST_LINK_FLAGS -latomic)
 endif()
 
 set(COMPILER_RT_GOOGLETEST_SOURCES ${COMPILER_RT_GTEST_SOURCE} ${COMPILER_RT_GMOCK_SOURCE})
@@ -60,7 +64,7 @@ foreach(arch ${RTSAN_TEST_ARCH})
   #  RtsanUnitTests "Rtsan-${arch}-Test" ${arch}
   #  COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS}
   #  SOURCES ${RTSAN_INST_TEST_SOURCES} ${COMPILER_RT_GOOGLETEST_SOURCES}
-  #  DEPS llvm_gtest rtsan
+  #  DEPS rtsan
   #  CFLAGS ${RTSAN_UNITTEST_CFLAGS} -fsanitize=realtime
   #  LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS} -fsanitize=realtime)
 
@@ -94,7 +98,6 @@ foreach(arch ${RTSAN_TEST_ARCH})
     COMPILE_DEPS ${RTSAN_UNITTEST_HEADERS}
     SOURCES ${RTSAN_NOINST_TEST_SOURCES}
             ${COMPILER_RT_GOOGLETEST_SOURCES}
-    DEPS llvm_gtest
     CFLAGS ${RTSAN_UNITTEST_CFLAGS}
     LINK_FLAGS ${RTSAN_UNITTEST_LINK_FLAGS}
     RUNTIME ${RTSAN_TEST_RUNTIME})
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index 27c36b7f84d892..d37430e0e5773e 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -212,7 +212,6 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
                                            const llvm::Triple triple) {
   llvm::StringRef cpu = targetOpts.cpu;
   llvm::StringMap<bool> implicitFeaturesMap;
-  std::string errorMsg;
   // Get the set of implicit target features
   llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap);
 
@@ -222,11 +221,12 @@ getExplicitAndImplicitAMDGPUTargetFeatures(clang::DiagnosticsEngine &diags,
     implicitFeaturesMap[userKeyString] = (userFeature[0] == '+');
   }
 
-  if (!llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap,
-                                           errorMsg)) {
+  auto HasError =
+      llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap);
+  if (HasError.first) {
     unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error,
                                             "Unsupported feature ID: %0");
-    diags.Report(diagID) << errorMsg.data();
+    diags.Report(diagID) << HasError.second;
     return std::string();
   }
 
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 77869b79682bfd..90a4dab2decba9 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -314,6 +314,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.isnan
+    libc.src.math.isnanf
+    libc.src.math.isnanl
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index 981db8c0bd0cc7..e735dd157c6b2e 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -309,6 +309,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.isnan
+    libc.src.math.isnanf
+    libc.src.math.isnanl
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 62f3f0df247ccc..166144d6343449 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -313,6 +313,8 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.nexttowardf
     libc.src.math.pow
     libc.src.math.powf
+    libc.src.math.powi
+    libc.src.math.powif
     libc.src.math.remainder
     libc.src.math.remainderf
     libc.src.math.remquo
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index c2b37ff1ceec1c..030c3d3a99a02c 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -424,6 +424,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.isnan
+    libc.src.math.isnanf
+    libc.src.math.isnanl
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index f7589c6a97a2ea..516a4b6ce34334 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -426,6 +426,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.isnan
+    libc.src.math.isnanf
+    libc.src.math.isnanl
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index c540f50a17bc8f..b6c55e7aa3033b 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -450,6 +450,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.isnan
+    libc.src.math.isnanf
+    libc.src.math.isnanl
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 7914a3d7e6d1a9..70412e4ed203d5 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -320,6 +320,8 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | pow       | |check|          |                 |                        |                      |                        | 7.12.7.5               | F.10.4.5                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
+| powi\*    |                  |                 |                        |                      |                        |                        |                            |
++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | pown      |                  |                 |                        |                      |                        | 7.12.7.6               | F.10.4.6                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | powr      |                  |                 |                        |                      |                        | 7.12.7.7               | F.10.4.7                   |
diff --git a/libc/hdr/math_macros.h b/libc/hdr/math_macros.h
index 863451123f3f80..d5a823723747c9 100644
--- a/libc/hdr/math_macros.h
+++ b/libc/hdr/math_macros.h
@@ -11,6 +11,7 @@
 
 #ifdef LIBC_FULL_BUILD
 
+#include "include/llvm-libc-macros/math-function-macros.h"
 #include "include/llvm-libc-macros/math-macros.h"
 
 #else // Overlay mode
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index f8ef35078a8c46..2cf7206f3a625e 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -122,6 +122,7 @@ add_gen_header(
     .llvm_libc_common_h
     .llvm-libc-macros.float16_macros
     .llvm-libc-macros.math_macros
+    .llvm-libc-macros.math_function_macros
     .llvm-libc-types.double_t
     .llvm-libc-types.float_t
     .llvm-libc-types.float128
diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt
index 86d6271ff88ac2..3c10abef8768c0 100644
--- a/libc/include/llvm-libc-macros/CMakeLists.txt
+++ b/libc/include/llvm-libc-macros/CMakeLists.txt
@@ -117,6 +117,12 @@ add_macro_header(
     .limits_macros
 )
 
+add_macro_header(
+  math_function_macros
+  HDR
+    math-function-macros.h
+)
+
 add_macro_header(
   offsetof_macro
   HDR
diff --git a/libc/include/llvm-libc-macros/math-function-macros.h b/libc/include/llvm-libc-macros/math-function-macros.h
new file mode 100644
index 00000000000000..551719af2b4ddf
--- /dev/null
+++ b/libc/include/llvm-libc-macros/math-function-macros.h
@@ -0,0 +1,16 @@
+//===-- Definition of function macros from math.h -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H
+#define LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H
+
+#define isfinite(x) __builtin_isfinite(x)
+#define isinf(x) __builtin_isinf(x)
+#define isnan(x) __builtin_isnan(x)
+
+#endif // LLVM_LIBC_MACROS_MATH_FUNCTION_MACROS_H
diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h
index efecdf16962a8e..9c56da9e89db38 100644
--- a/libc/include/llvm-libc-macros/math-macros.h
+++ b/libc/include/llvm-libc-macros/math-macros.h
@@ -49,9 +49,4 @@
 #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT)
 #endif
 
-// TODO: Move generic functional math macros to a separate header file.
-#define isfinite(x) __builtin_isfinite(x)
-#define isinf(x) __builtin_isinf(x)
-#define isnan(x) __builtin_isnan(x)
-
 #endif // LLVM_LIBC_MACROS_MATH_MACROS_H
diff --git a/libc/include/math.h.def b/libc/include/math.h.def
index 454b8f29805145..9822d8bd7ba173 100644
--- a/libc/include/math.h.def
+++ b/libc/include/math.h.def
@@ -17,4 +17,7 @@
 
 %%public_api()
 
+
+#include "llvm-libc-macros/math-function-macros.h"
+
 #endif // LLVM_LIBC_MATH_H
diff --git a/libc/spec/bsd_ext.td b/libc/spec/bsd_ext.td
index 50ca8b919ff2c5..4d33313521735e 100644
--- a/libc/spec/bsd_ext.td
+++ b/libc/spec/bsd_ext.td
@@ -1,4 +1,16 @@
 def BsdExtensions : StandardSpec<"BSDExtensions"> {
+  HeaderSpec Math = HeaderSpec<
+      "math.h",
+      [], // Macros
+      [], // Types
+      [], // Enumerations
+      [
+          FunctionSpec<"isnan", RetValSpec<IntType>, [ArgSpec<DoubleType>]>,
+          FunctionSpec<"isnanf", RetValSpec<IntType>, [ArgSpec<FloatType>]>,
+          FunctionSpec<"isnanl", RetValSpec<IntType>, [ArgSpec<LongDoubleType>]>,
+      ]
+  >;
+
   HeaderSpec String = HeaderSpec<
       "string.h",
       [], // Macros
@@ -67,6 +79,7 @@ def BsdExtensions : StandardSpec<"BSDExtensions"> {
   >;
 
   let Headers = [
+    Math,
     String,
     Strings,
     SysWait,
diff --git a/libc/spec/llvm_libc_ext.td b/libc/spec/llvm_libc_ext.td
index c0374cb6311918..86215029831cae 100644
--- a/libc/spec/llvm_libc_ext.td
+++ b/libc/spec/llvm_libc_ext.td
@@ -76,6 +76,9 @@ def LLVMLibcExt : StandardSpec<"llvm_libc_ext"> {
           GuardedFunctionSpec<"f16sqrt", RetValSpec<Float16Type>, [ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16sqrtl", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
+
+          FunctionSpec<"powi", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
+          FunctionSpec<"powif", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
       ]
   >;
 
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 8d44a98a693f87..32267bb68e1cc6 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -67,16 +67,26 @@ template <size_t Bits> struct DyadicFloat {
   }
 
   // Used for aligning exponents.  Output might not be normalized.
-  LIBC_INLINE constexpr DyadicFloat &shift_left(int shift_length) {
-    exponent -= shift_length;
-    mantissa <<= static_cast<size_t>(shift_length);
+  LIBC_INLINE constexpr DyadicFloat &shift_left(unsigned shift_length) {
+    if (shift_length < Bits) {
+      exponent -= static_cast<int>(shift_length);
+      mantissa <<= shift_length;
+    } else {
+      exponent = 0;
+      mantissa = MantissaType(0);
+    }
     return *this;
   }
 
   // Used for aligning exponents.  Output might not be normalized.
-  LIBC_INLINE constexpr DyadicFloat &shift_right(int shift_length) {
-    exponent += shift_length;
-    mantissa >>= static_cast<size_t>(shift_length);
+  LIBC_INLINE constexpr DyadicFloat &shift_right(unsigned shift_length) {
+    if (shift_length < Bits) {
+      exponent += static_cast<int>(shift_length);
+      mantissa >>= shift_length;
+    } else {
+      exponent = 0;
+      mantissa = MantissaType(0);
+    }
     return *this;
   }
 
@@ -261,9 +271,9 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
 
   // Align exponents
   if (a.exponent > b.exponent)
-    b.shift_right(a.exponent - b.exponent);
+    b.shift_right(static_cast<unsigned>(a.exponent - b.exponent));
   else if (b.exponent > a.exponent)
-    a.shift_right(b.exponent - a.exponent);
+    a.shift_right(static_cast<unsigned>(b.exponent - a.exponent));
 
   DyadicFloat<Bits> result;
 
diff --git a/libc/src/__support/OSUtil/baremetal/exit.cpp b/libc/src/__support/OSUtil/baremetal/exit.cpp
index 08473f7f3b00b2..26088c28b0a50e 100644
--- a/libc/src/__support/OSUtil/baremetal/exit.cpp
+++ b/libc/src/__support/OSUtil/baremetal/exit.cpp
@@ -8,11 +8,11 @@
 
 #include "src/__support/OSUtil/exit.h"
 
+namespace LIBC_NAMESPACE::internal {
+
 // This is intended to be provided by the vendor.
 extern "C" [[noreturn]] void __llvm_libc_exit(int status);
 
-namespace LIBC_NAMESPACE::internal {
-
 [[noreturn]] void exit(int status) { __llvm_libc_exit(status); }
 
 } // namespace LIBC_NAMESPACE::internal
diff --git a/libc/src/__support/OSUtil/baremetal/io.cpp b/libc/src/__support/OSUtil/baremetal/io.cpp
index 5dd92e4a56ce4f..c97bd5ae65b134 100644
--- a/libc/src/__support/OSUtil/baremetal/io.cpp
+++ b/libc/src/__support/OSUtil/baremetal/io.cpp
@@ -10,6 +10,8 @@
 
 #include "src/__support/CPP/string_view.h"
 
+namespace LIBC_NAMESPACE {
+
 // This is intended to be provided by the vendor.
 
 extern struct __llvm_libc_stdin __llvm_libc_stdin;
@@ -17,8 +19,6 @@ extern "C" ssize_t __llvm_libc_stdin_read(void *cookie, char *buf, size_t size);
 
 extern "C" void __llvm_libc_log_write(const char *msg, size_t len);
 
-namespace LIBC_NAMESPACE {
-
 ssize_t read_from_stdin(char *buf, size_t size) {
   return __llvm_libc_stdin_read(reinterpret_cast<void *>(&__llvm_libc_stdin),
                                 buf, size);
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 74c2e4efda617e..6462afbc54a4fe 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -245,6 +245,10 @@ add_math_entrypoint_object(ilogbl)
 add_math_entrypoint_object(ilogbf16)
 add_math_entrypoint_object(ilogbf128)
 
+add_math_entrypoint_object(isnan)
+add_math_entrypoint_object(isnanf)
+add_math_entrypoint_object(isnanl)
+
 add_math_entrypoint_object(llogb)
 add_math_entrypoint_object(llogbf)
 add_math_entrypoint_object(llogbl)
@@ -342,6 +346,8 @@ add_math_entrypoint_object(nextupf128)
 
 add_math_entrypoint_object(pow)
 add_math_entrypoint_object(powf)
+add_math_entrypoint_object(powi)
+add_math_entrypoint_object(powif)
 
 add_math_entrypoint_object(remainder)
 add_math_entrypoint_object(remainderf)
diff --git a/libc/src/math/amdgpu/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt
index 202177f54b11ad..2ceb12785c6073 100644
--- a/libc/src/math/amdgpu/CMakeLists.txt
+++ b/libc/src/math/amdgpu/CMakeLists.txt
@@ -468,6 +468,30 @@ add_entrypoint_object(
   VENDOR
 )
 
+add_entrypoint_object(
+  powi
+  SRCS
+    powi.cpp
+  HDRS
+    ../powi.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+  VENDOR
+)
+
+add_entrypoint_object(
+  powif
+  SRCS
+    powif.cpp
+  HDRS
+    ../powif.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+  VENDOR
+)
+
 add_entrypoint_object(
   sinh
   SRCS
diff --git a/libc/src/math/amdgpu/declarations.h b/libc/src/math/amdgpu/declarations.h
index 5d7f3c9609d238..81d788d943f22b 100644
--- a/libc/src/math/amdgpu/declarations.h
+++ b/libc/src/math/amdgpu/declarations.h
@@ -65,6 +65,8 @@ float __ocml_nextafter_f32(float, float);
 double __ocml_nextafter_f64(double, double);
 float __ocml_pow_f32(float, float);
 double __ocml_pow_f64(double, double);
+float __ocml_pown_f32(float, int);
+double __ocml_pown_f64(double, int);
 float __ocml_sin_f32(float);
 double __ocml_sin_f64(double);
 float __ocml_sincos_f32(float, float *);
diff --git a/libc/src/math/amdgpu/powi.cpp b/libc/src/math/amdgpu/powi.cpp
new file mode 100644
index 00000000000000..b13cb52aaaf6ad
--- /dev/null
+++ b/libc/src/math/amdgpu/powi.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of the powi function for GPU -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/powi.h"
+#include "src/__support/common.h"
+
+#include "declarations.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, powi, (double x, int y)) {
+  return __ocml_pown_f64(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/amdgpu/powif.cpp b/libc/src/math/amdgpu/powif.cpp
new file mode 100644
index 00000000000000..7682f0e1a27a65
--- /dev/null
+++ b/libc/src/math/amdgpu/powif.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of the powi function for GPU -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/powif.h"
+#include "src/__support/common.h"
+
+#include "declarations.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, powif, (float x, int y)) {
+  return __ocml_pown_f32(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index e2bbdcfe5a15b8..5e920307d39de4 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2848,6 +2848,36 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  isnan
+  SRCS
+    isnan.cpp
+  HDRS
+    ../isnan.h
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  isnanf
+  SRCS
+    isnanf.cpp
+  HDRS
+    ../isnanf.h
+  COMPILE_OPTIONS
+    -O3
+)
+
+add_entrypoint_object(
+  isnanl
+  SRCS
+    isnanl.cpp
+  HDRS
+    ../isnanl.h
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   nan
   SRCS
diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp
index 574c4b9aaf39f7..150c0bbcf60da9 100644
--- a/libc/src/math/generic/expm1.cpp
+++ b/libc/src/math/generic/expm1.cpp
@@ -25,7 +25,9 @@
 #include "src/__support/integer_literals.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 
-#include <errno.h>
+#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
+#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+#endif
 
 // #define DEBUGDEBUG
 
@@ -51,7 +53,7 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
 constexpr uint64_t ERR_D = 0x3c08000000000000;
 // Errors when using double-double precision.
 // 0x1.0p-99
-constexpr uint64_t ERR_DD = 0x39c0000000000000;
+[[maybe_unused]] constexpr uint64_t ERR_DD = 0x39c0000000000000;
 
 // -2^-12 * log(2)
 // > a = -2^-12 * log(2);
@@ -108,7 +110,7 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
 // Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
 // For |dx| < 2^-13 + 2^-30:
 //   | output - exp(dx) | < 2^-126.
-Float128 poly_approx_f128(const Float128 &dx) {
+[[maybe_unused]] Float128 poly_approx_f128(const Float128 &dx) {
   constexpr Float128 COEFFS_128[]{
       {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
       {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
@@ -127,13 +129,14 @@ Float128 poly_approx_f128(const Float128 &dx) {
 
 #ifdef DEBUGDEBUG
 std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
-  OS << (r.sign ? "-(" : "(") << r.mantissa.val[0] << " + " << r.mantissa.val[1]
-     << " * 2^64) * 2^" << r.exponent << "\n";
+  OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
+     << r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
   return OS;
 }
 
 std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
-  OS << std::hexfloat << r.hi << " + " << r.lo << std::defaultfloat << "\n";
+  OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
+     << std::defaultfloat << "\n";
   return OS;
 }
 #endif
@@ -141,7 +144,7 @@ std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
 // Compute exp(x) - 1 using 128-bit precision.
 // TODO(lntue): investigate triple-double precision implementation for this
 // step.
-Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
+[[maybe_unused]] Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
   // Recalculate dx:
 
   double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -182,9 +185,10 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
 #ifdef DEBUGDEBUG
   std::cout << "=== VERY SLOW PASS ===\n"
             << "        kd: " << kd << "\n"
-            << "        dx: " << dx << "exp_mid_m1: " << exp_mid_m1
-            << "   exp_mid: " << exp_mid << "         p: " << p
-            << "         r: " << r << std::endl;
+            << "        hi: " << hi << "\n"
+            << " minus_one: " << minus_one << "        dx: " << dx
+            << "exp_mid_m1: " << exp_mid_m1 << "   exp_mid: " << exp_mid
+            << "         p: " << p << "         r: " << r << std::endl;
 #endif
 
   return r;
@@ -479,6 +483,12 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
   // Use double-double
   DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
 
+#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+  int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+  double r =
+      cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
+  return r;
+#else
   double err_dd = cpp::bit_cast<double>(ERR_DD + err);
 
   double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
@@ -494,6 +504,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
   Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
 
   return static_cast<double>(r_f128);
+#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/isnan.cpp b/libc/src/math/generic/isnan.cpp
new file mode 100644
index 00000000000000..dd7eadb2a2031c
--- /dev/null
+++ b/libc/src/math/generic/isnan.cpp
@@ -0,0 +1,16 @@
+//===-- Implementation of isnan function ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/isnan.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, isnan, (double x)) { return __builtin_isnan(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/isnanf.cpp b/libc/src/math/generic/isnanf.cpp
new file mode 100644
index 00000000000000..98a81b03f538af
--- /dev/null
+++ b/libc/src/math/generic/isnanf.cpp
@@ -0,0 +1,16 @@
+//===-- Implementation of isnanf function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/isnanf.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, isnanf, (float x)) { return __builtin_isnan(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/isnanl.cpp b/libc/src/math/generic/isnanl.cpp
new file mode 100644
index 00000000000000..d61bfd52f0b7dd
--- /dev/null
+++ b/libc/src/math/generic/isnanl.cpp
@@ -0,0 +1,16 @@
+//===-- Implementation of isnanl function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/isnanl.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, isnanl, (long double x)) { return __builtin_isnan(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/isnan.h b/libc/src/math/isnan.h
new file mode 100644
index 00000000000000..eda8e7eb30f39b
--- /dev/null
+++ b/libc/src/math/isnan.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for isnan -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ISNAN_H
+#define LLVM_LIBC_SRC_MATH_ISNAN_H
+
+namespace LIBC_NAMESPACE {
+
+int isnan(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ISNAN_H
diff --git a/libc/src/math/isnanf.h b/libc/src/math/isnanf.h
new file mode 100644
index 00000000000000..a12d39ee5af971
--- /dev/null
+++ b/libc/src/math/isnanf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for isnanf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ISNANF_H
+#define LLVM_LIBC_SRC_MATH_ISNANF_H
+
+namespace LIBC_NAMESPACE {
+
+int isnanf(float x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ISNANF_H
diff --git a/libc/src/math/isnanl.h b/libc/src/math/isnanl.h
new file mode 100644
index 00000000000000..9fbfca03cb15e6
--- /dev/null
+++ b/libc/src/math/isnanl.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for isnanl ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ISNANL_H
+#define LLVM_LIBC_SRC_MATH_ISNANL_H
+
+namespace LIBC_NAMESPACE {
+
+int isnanl(long double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ISNANL_H
diff --git a/libc/src/math/nvptx/CMakeLists.txt b/libc/src/math/nvptx/CMakeLists.txt
index bf37c52f09e44f..4295ebf9ff630c 100644
--- a/libc/src/math/nvptx/CMakeLists.txt
+++ b/libc/src/math/nvptx/CMakeLists.txt
@@ -421,6 +421,30 @@ add_entrypoint_object(
   VENDOR
 )
 
+add_entrypoint_object(
+  powi
+  SRCS
+    powi.cpp
+  HDRS
+    ../powi.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+  VENDOR
+)
+
+add_entrypoint_object(
+  powif
+  SRCS
+    powif.cpp
+  HDRS
+    ../powif.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+  VENDOR
+)
+
 add_entrypoint_object(
   sinh
   SRCS
diff --git a/libc/src/math/nvptx/declarations.h b/libc/src/math/nvptx/declarations.h
index d41b16c8eec9fd..cf8f6e09e492aa 100644
--- a/libc/src/math/nvptx/declarations.h
+++ b/libc/src/math/nvptx/declarations.h
@@ -64,6 +64,8 @@ double __nv_nextafter(double, double);
 float __nv_nextafterf(float, float);
 double __nv_pow(double, double);
 float __nv_powf(float, float);
+double __nv_powi(double, int);
+float __nv_powif(float, int);
 double __nv_sin(double);
 float __nv_sinf(float);
 void __nv_sincos(double, double *, double *);
diff --git a/libc/src/math/nvptx/powi.cpp b/libc/src/math/nvptx/powi.cpp
new file mode 100644
index 00000000000000..b7871ab9da92e9
--- /dev/null
+++ b/libc/src/math/nvptx/powi.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the powi function for GPU -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/powi.h"
+#include "src/__support/common.h"
+
+#include "declarations.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, powi, (double x, int y)) { return __nv_powi(x, y); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/nvptx/powif.cpp b/libc/src/math/nvptx/powif.cpp
new file mode 100644
index 00000000000000..0143745479cecb
--- /dev/null
+++ b/libc/src/math/nvptx/powif.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the powif function for GPU ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/powif.h"
+#include "src/__support/common.h"
+
+#include "declarations.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, powif, (float x, int y)) { return __nv_powif(x, y); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/powi.h b/libc/src/math/powi.h
new file mode 100644
index 00000000000000..1ee7140f06489d
--- /dev/null
+++ b/libc/src/math/powi.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for powi --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_POWI_H
+#define LLVM_LIBC_SRC_MATH_POWI_H
+
+namespace LIBC_NAMESPACE {
+
+double powi(double x, int y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_POW_H
diff --git a/libc/src/math/powif.h b/libc/src/math/powif.h
new file mode 100644
index 00000000000000..417012a34fecc2
--- /dev/null
+++ b/libc/src/math/powif.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for powif -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_POWIF_H
+#define LLVM_LIBC_SRC_MATH_POWIF_H
+
+namespace LIBC_NAMESPACE {
+
+float powif(float x, int y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_POW_H
diff --git a/libc/src/stdlib/exit.cpp b/libc/src/stdlib/exit.cpp
index 1f7ccbb5566074..1afeec5a460da4 100644
--- a/libc/src/stdlib/exit.cpp
+++ b/libc/src/stdlib/exit.cpp
@@ -10,10 +10,10 @@
 #include "src/__support/OSUtil/exit.h"
 #include "src/__support/common.h"
 
-extern "C" void __cxa_finalize(void *);
-
 namespace LIBC_NAMESPACE {
 
+extern "C" void __cxa_finalize(void *);
+
 [[noreturn]] LLVM_LIBC_FUNCTION(void, exit, (int status)) {
   __cxa_finalize(nullptr);
   internal::exit(status);
diff --git a/libc/startup/baremetal/fini.cpp b/libc/startup/baremetal/fini.cpp
index 84997fb4fa1d81..745bd094b24fee 100644
--- a/libc/startup/baremetal/fini.cpp
+++ b/libc/startup/baremetal/fini.cpp
@@ -9,13 +9,13 @@
 #include <stddef.h>
 #include <stdint.h>
 
+namespace LIBC_NAMESPACE {
+
 extern "C" {
 extern uintptr_t __fini_array_start[];
 extern uintptr_t __fini_array_end[];
 }
 
-namespace LIBC_NAMESPACE {
-
 using FiniCallback = void(void);
 
 extern "C" void __libc_fini_array(void) {
diff --git a/libc/startup/baremetal/init.cpp b/libc/startup/baremetal/init.cpp
index 08dff74f051989..21ec0e5ca756d0 100644
--- a/libc/startup/baremetal/init.cpp
+++ b/libc/startup/baremetal/init.cpp
@@ -9,6 +9,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+namespace LIBC_NAMESPACE {
+
 extern "C" {
 extern uintptr_t __preinit_array_start[];
 extern uintptr_t __preinit_array_end[];
@@ -16,8 +18,6 @@ extern uintptr_t __init_array_start[];
 extern uintptr_t __init_array_end[];
 }
 
-namespace LIBC_NAMESPACE {
-
 using InitCallback = void(void);
 
 extern "C" void __libc_init_array(void) {
diff --git a/libc/test/src/math/expm1_test.cpp b/libc/test/src/math/expm1_test.cpp
index 1bf07f19f3a7c6..df5c08864bb8a3 100644
--- a/libc/test/src/math/expm1_test.cpp
+++ b/libc/test/src/math/expm1_test.cpp
@@ -14,7 +14,6 @@
 #include "test/UnitTest/Test.h"
 #include "utils/MPFRWrapper/MPFRUtils.h"
 
-#include <errno.h>
 #include <stdint.h>
 
 using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
@@ -23,34 +22,24 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
 TEST_F(LlvmLibcExpm1Test, TrickyInputs) {
-  constexpr int N = 21;
-  constexpr uint64_t INPUTS[N] = {
-      0x3FD79289C6E6A5C0, // x=0x1.79289c6e6a5cp-2
-      0x3FD05DE80A173EA0, // x=0x1.05de80a173eap-2
-      0xbf1eb7a4cb841fcc, // x=-0x1.eb7a4cb841fccp-14
-      0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
-      0x3fda7b764e2cf47a, // x=0x1.a7b764e2cf47ap-2
-      0xc04757852a4b93aa, // x=-0x1.757852a4b93aap+5
-      0x4044c19e5712e377, // x=0x1.4c19e5712e377p+5
-      0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
-      0xc039a74cdab36c28, // x=-0x1.9a74cdab36c28p+4
-      0xc085b3e4e2e3bba9, // x=-0x1.5b3e4e2e3bba9p+9
-      0xc086960d591aec34, // x=-0x1.6960d591aec34p+9
-      0xc086232c09d58d91, // x=-0x1.6232c09d58d91p+9
-      0xc0874910d52d3051, // x=-0x1.74910d52d3051p9
-      0xc0867a172ceb0990, // x=-0x1.67a172ceb099p+9
-      0xc08ff80000000000, // x=-0x1.ff8p+9
-      0xbc971547652b82fe, // x=-0x1.71547652b82fep-54
-      0xbce465655f122ff6, // x=-0x1.465655f122ff6p-49
-      0x3d1bc8ee6b28659a, // x=0x1.bc8ee6b28659ap-46
-      0x3f18442b169f672d, // x=0x1.8442b169f672dp-14
-      0xc02b4f0cfb15ca0f, // x=-0x1.b4f0cfb15ca0fp+3
-      0xc042b708872320dd, // x=-0x1.2b708872320ddp+5
+  constexpr double INPUTS[] = {
+      0x1.71547652b82fep-54, 0x1.465655f122ff6p-49, 0x1.bc8ee6b28659ap-46,
+      0x1.8442b169f672dp-14, 0x1.9a61fb925970dp-14, 0x1.eb7a4cb841fccp-14,
+      0x1.05de80a173eap-2,   0x1.79289c6e6a5cp-2,   0x1.a7b764e2cf47ap-2,
+      0x1.b4f0cfb15ca0fp+3,  0x1.9a74cdab36c28p+4,  0x1.2b708872320ddp+5,
+      0x1.4c19e5712e377p+5,  0x1.757852a4b93aap+5,  0x1.77f74111e0894p+6,
+      0x1.a6c3780bbf824p+6,  0x1.e3d57e4c557f6p+6,  0x1.f07560077985ap+6,
+      0x1.1f0da93354198p+7,  0x1.71018579c0758p+7,  0x1.204684c1167e9p+8,
+      0x1.5b3e4e2e3bba9p+9,  0x1.6232c09d58d91p+9,  0x1.67a172ceb099p+9,
+      0x1.6960d591aec34p+9,  0x1.74910d52d3051p+9,  0x1.ff8p+9,
   };
+  constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
   for (int i = 0; i < N; ++i) {
-    double x = FPBits(INPUTS[i]).get_val();
+    double x = INPUTS[i];
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
                                    LIBC_NAMESPACE::expm1(x), 0.5);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, -x,
+                                   LIBC_NAMESPACE::expm1(-x), 0.5);
   }
 }
 
@@ -98,10 +87,10 @@ TEST_F(LlvmLibcExpm1Test, InDoubleRange) {
         }
       }
     }
-    tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
-         << " tests.\n";
-    tlog << "   Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
     if (fails) {
+      tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
+           << " tests.\n";
+      tlog << "   Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
       EXPECT_MPFR_MATCH(mpfr::Operation::Expm1, mx, mr, 0.5, rounding_mode);
     }
   };
diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp
index e9e3e59f4d12de..80d57939a4f61b 100644
--- a/libc/test/src/math/tan_test.cpp
+++ b/libc/test/src/math/tan_test.cpp
@@ -22,14 +22,15 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
   constexpr double INPUTS[] = {
       0x1.d130383d17321p-27,   0x1.8000000000009p-23,  0x1.8000000000024p-22,
       0x1.800000000009p-21,    0x1.20000000000f3p-20,  0x1.800000000024p-20,
-      0x1.e0000000001c2p-20,   0x1.0da8cc189b47dp-10,  0x1.00a33764a0a83p-7,
-      0x1.911a18779813fp-7,    0x1.940c877fb7dacp-7,   0x1.f42fb19b5b9b2p-6,
-      0x1.0285070f9f1bcp-5,    0x1.6ca9ef729af76p-1,   0x1.23f40dccdef72p+0,
-      0x1.43cf16358c9d7p+0,    0x1.addf3b9722265p+0,   0x1.ae78d360afa15p+0,
-      0x1.fe81868fc47fep+1,    0x1.e31b55306f22cp+2,   0x1.e639103a05997p+2,
-      0x1.f7898d5a756ddp+2,    0x1.1685973506319p+3,   0x1.5f09cad750ab1p+3,
-      0x1.aaf85537ea4c7p+3,    0x1.4f2b874135d27p+4,   0x1.13114266f9764p+4,
-      0x1.a211877de55dbp+4,    0x1.a5eece87e8606p+4,   0x1.a65d441ea6dcep+4,
+      0x1.e0000000001c2p-20,   0x1.00452f0e0134dp-13,  0x1.0da8cc189b47dp-10,
+      0x1.00a33764a0a83p-7,    0x1.911a18779813fp-7,   0x1.940c877fb7dacp-7,
+      0x1.f42fb19b5b9b2p-6,    0x1.0285070f9f1bcp-5,   0x1.89f0f5241255bp-2,
+      0x1.6ca9ef729af76p-1,    0x1.23f40dccdef72p+0,   0x1.43cf16358c9d7p+0,
+      0x1.addf3b9722265p+0,    0x1.ae78d360afa15p+0,   0x1.fe81868fc47fep+1,
+      0x1.e31b55306f22cp+2,    0x1.e639103a05997p+2,   0x1.f7898d5a756ddp+2,
+      0x1.1685973506319p+3,    0x1.5f09cad750ab1p+3,   0x1.aaf85537ea4c7p+3,
+      0x1.4f2b874135d27p+4,    0x1.13114266f9764p+4,   0x1.a211877de55dbp+4,
+      0x1.a5eece87e8606p+4,    0x1.a65d441ea6dcep+4,   0x1.045457ae3994p+5,
       0x1.1ffb509f3db15p+5,    0x1.2345d1e090529p+5,   0x1.c96e28eb679f8p+5,
       0x1.da1838053b866p+5,    0x1.be886d9c2324dp+6,   0x1.ab514bfc61c76p+7,
       0x1.14823229799c2p+7,    0x1.48ff1782ca91dp+8,   0x1.dcbfda0c7559ep+8,
@@ -42,6 +43,7 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
       0x1.6ac5b262ca1ffp+843,  0x1.8bb5847d49973p+845, 0x1.6ac5b262ca1ffp+849,
       0x1.f08b14e1c4d0fp+890,  0x1.2b5fe88a9d8d5p+903, 0x1.a880417b7b119p+1023,
       0x1.f6d7518808571p+1023,
+
   };
   constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
 
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 07a1b63be80510..faacc8f834be72 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -1251,9 +1251,8 @@ mergeAttributesSection(const SmallVector<InputSectionBase *, 0> &sections) {
     }
   }
 
-  if (hasArch) {
-    if (auto result = RISCVISAInfo::postProcessAndChecking(
-            std::make_unique<RISCVISAInfo>(xlen, exts))) {
+  if (hasArch && xlen != 0) {
+    if (auto result = RISCVISAInfo::createFromExtMap(xlen, exts)) {
       merged.strAttr.try_emplace(RISCVAttrs::ARCH,
                                  saver().save((*result)->toString()));
     } else {
diff --git a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py
index 21f2095db90f8f..818fdf0e6b5c5e 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py
@@ -92,11 +92,28 @@ def match_android_device(device_arch, valid_archs=None, valid_api_levels=None):
 
 
 def finalize_build_dictionary(dictionary):
+    # Provide uname-like platform name
+    platform_name_to_uname = {
+        "linux": "Linux",
+        "netbsd": "NetBSD",
+        "freebsd": "FreeBSD",
+        "windows": "Windows_NT",
+        "macosx": "Darwin",
+        "darwin": "Darwin",
+    }
+
+    if dictionary is None:
+        dictionary = {}
     if target_is_android():
-        if dictionary is None:
-            dictionary = {}
         dictionary["OS"] = "Android"
         dictionary["PIE"] = 1
+    elif platformIsDarwin():
+        dictionary["OS"] = "Darwin"
+    else:
+        dictionary["OS"] = platform_name_to_uname[getPlatform()]
+
+    dictionary["HOST_OS"] = platform_name_to_uname[getHostPlatform()]
+
     return dictionary
 
 
diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
index bd8eea3d6f5a04..3d562285ce9cc0 100644
--- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
+++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
@@ -55,7 +55,10 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../
 # When running tests from Visual Studio, the environment variable isn't
 # inherited all the way down to the process spawned for make.
 #----------------------------------------------------------------------
-HOST_OS := $(shell uname -s)
+ifeq "$(HOST_OS)" ""
+  HOST_OS := $(shell uname -s)
+endif
+
 ifneq (,$(findstring windows32,$(HOST_OS)))
 	HOST_OS := Windows_NT
 endif
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 6fac0df1d7a66d..dc7f6c9e86a47c 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -4152,7 +4152,6 @@ bool Process::ProcessEventData::ShouldStop(Event *event_ptr,
 
   ThreadList &curr_thread_list = process_sp->GetThreadList();
   uint32_t num_threads = curr_thread_list.GetSize();
-  uint32_t idx;
 
   // The actions might change one of the thread's stop_info's opinions about
   // whether we should stop the process, so we need to query that as we go.
@@ -4162,23 +4161,18 @@ bool Process::ProcessEventData::ShouldStop(Event *event_ptr,
   // get that wrong (which is possible) then the thread list might have
   // changed, and that would cause our iteration here to crash.  We could
   // make a copy of the thread list, but we'd really like to also know if it
-  // has changed at all, so we make up a vector of the thread ID's and check
-  // what we get back against this list & bag out if anything differs.
-  ThreadList not_suspended_thread_list(process_sp.get());
-  std::vector<uint32_t> thread_index_array(num_threads);
-  uint32_t not_suspended_idx = 0;
-  for (idx = 0; idx < num_threads; ++idx) {
+  // has changed at all, so we store the original thread ID's of all threads and
+  // check what we get back against this list & bag out if anything differs.
+  std::vector<std::pair<ThreadSP, size_t>> not_suspended_threads;
+  for (uint32_t idx = 0; idx < num_threads; ++idx) {
     lldb::ThreadSP thread_sp = curr_thread_list.GetThreadAtIndex(idx);
 
     /*
      Filter out all suspended threads, they could not be the reason
      of stop and no need to perform any actions on them.
      */
-    if (thread_sp->GetResumeState() != eStateSuspended) {
-      not_suspended_thread_list.AddThread(thread_sp);
-      thread_index_array[not_suspended_idx] = thread_sp->GetIndexID();
-      not_suspended_idx++;
-    }
+    if (thread_sp->GetResumeState() != eStateSuspended)
+      not_suspended_threads.emplace_back(thread_sp, thread_sp->GetIndexID());
   }
 
   // Use this to track whether we should continue from here.  We will only
@@ -4194,8 +4188,7 @@ bool Process::ProcessEventData::ShouldStop(Event *event_ptr,
   // is, and it's better to let the user decide than continue behind their
   // backs.
 
-  for (idx = 0; idx < not_suspended_thread_list.GetSize(); ++idx) {
-    curr_thread_list = process_sp->GetThreadList();
+  for (auto [thread_sp, thread_index] : not_suspended_threads) {
     if (curr_thread_list.GetSize() != num_threads) {
       Log *log(GetLog(LLDBLog::Step | LLDBLog::Process));
       LLDB_LOGF(
@@ -4205,14 +4198,11 @@ bool Process::ProcessEventData::ShouldStop(Event *event_ptr,
       break;
     }
 
-    lldb::ThreadSP thread_sp = not_suspended_thread_list.GetThreadAtIndex(idx);
-
-    if (thread_sp->GetIndexID() != thread_index_array[idx]) {
+    if (thread_sp->GetIndexID() != thread_index) {
       Log *log(GetLog(LLDBLog::Step | LLDBLog::Process));
-      LLDB_LOGF(log,
-                "The thread at position %u changed from %u to %u while "
-                "processing event.",
-                idx, thread_index_array[idx], thread_sp->GetIndexID());
+      LLDB_LOG(log,
+               "The thread {0} changed from {1} to {2} while processing event.",
+               thread_sp.get(), thread_index, thread_sp->GetIndexID());
       break;
     }
 
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index b7b78cb9edab32..354ad5bc953175 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -276,6 +276,8 @@ struct KnownFPClass {
     return (KnownFPClasses & Mask) == fcNone;
   }
 
+  bool isKnownAlways(FPClassTest Mask) const { return isKnownNever(~Mask); }
+
   bool isUnknown() const {
     return KnownFPClasses == fcAllFlags && !SignBit;
   }
@@ -285,6 +287,9 @@ struct KnownFPClass {
     return isKnownNever(fcNan);
   }
 
+  /// Return true if it's known this must always be a nan.
+  bool isKnownAlwaysNaN() const { return isKnownAlways(fcNan); }
+
   /// Return true if it's known this can never be an infinity.
   bool isKnownNeverInfinity() const {
     return isKnownNever(fcInf);
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index baa5476cec94a0..08cd666bf7f95b 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -50,445 +50,435 @@ class raw_ostream;
 class TargetInstrInfo;
 class VirtRegMap;
 
-  class LiveIntervals : public MachineFunctionPass {
-    MachineFunction *MF = nullptr;
-    MachineRegisterInfo *MRI = nullptr;
-    const TargetRegisterInfo *TRI = nullptr;
-    const TargetInstrInfo *TII = nullptr;
-    SlotIndexes *Indexes = nullptr;
-    MachineDominatorTree *DomTree = nullptr;
-    LiveIntervalCalc *LICalc = nullptr;
-
-    /// Special pool allocator for VNInfo's (LiveInterval val#).
-    VNInfo::Allocator VNInfoAllocator;
-
-    /// Live interval pointers for all the virtual registers.
-    IndexedMap<LiveInterval*, VirtReg2IndexFunctor> VirtRegIntervals;
-
-    /// Sorted list of instructions with register mask operands. Always use the
-    /// 'r' slot, RegMasks are normal clobbers, not early clobbers.
-    SmallVector<SlotIndex, 8> RegMaskSlots;
-
-    /// This vector is parallel to RegMaskSlots, it holds a pointer to the
-    /// corresponding register mask.  This pointer can be recomputed as:
-    ///
-    ///   MI = Indexes->getInstructionFromIndex(RegMaskSlot[N]);
-    ///   unsigned OpNum = findRegMaskOperand(MI);
-    ///   RegMaskBits[N] = MI->getOperand(OpNum).getRegMask();
-    ///
-    /// This is kept in a separate vector partly because some standard
-    /// libraries don't support lower_bound() with mixed objects, partly to
-    /// improve locality when searching in RegMaskSlots.
-    /// Also see the comment in LiveInterval::find().
-    SmallVector<const uint32_t*, 8> RegMaskBits;
-
-    /// For each basic block number, keep (begin, size) pairs indexing into the
-    /// RegMaskSlots and RegMaskBits arrays.
-    /// Note that basic block numbers may not be layout contiguous, that's why
-    /// we can't just keep track of the first register mask in each basic
-    /// block.
-    SmallVector<std::pair<unsigned, unsigned>, 8> RegMaskBlocks;
-
-    /// Keeps a live range set for each register unit to track fixed physreg
-    /// interference.
-    SmallVector<LiveRange*, 0> RegUnitRanges;
-
-  public:
-    static char ID;
-
-    LiveIntervals();
-    ~LiveIntervals() override;
-
-    /// Calculate the spill weight to assign to a single instruction.
-    static float getSpillWeight(bool isDef, bool isUse,
-                                const MachineBlockFrequencyInfo *MBFI,
-                                const MachineInstr &MI);
-
-    /// Calculate the spill weight to assign to a single instruction.
-    static float getSpillWeight(bool isDef, bool isUse,
-                                const MachineBlockFrequencyInfo *MBFI,
-                                const MachineBasicBlock *MBB);
-
-    LiveInterval &getInterval(Register Reg) {
-      if (hasInterval(Reg))
-        return *VirtRegIntervals[Reg.id()];
-
-      return createAndComputeVirtRegInterval(Reg);
-    }
-
-    const LiveInterval &getInterval(Register Reg) const {
-      return const_cast<LiveIntervals*>(this)->getInterval(Reg);
-    }
-
-    bool hasInterval(Register Reg) const {
-      return VirtRegIntervals.inBounds(Reg.id()) &&
-             VirtRegIntervals[Reg.id()];
-    }
-
-    /// Interval creation.
-    LiveInterval &createEmptyInterval(Register Reg) {
-      assert(!hasInterval(Reg) && "Interval already exists!");
-      VirtRegIntervals.grow(Reg.id());
-      VirtRegIntervals[Reg.id()] = createInterval(Reg);
+class LiveIntervals : public MachineFunctionPass {
+  MachineFunction *MF = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  SlotIndexes *Indexes = nullptr;
+  MachineDominatorTree *DomTree = nullptr;
+  LiveIntervalCalc *LICalc = nullptr;
+
+  /// Special pool allocator for VNInfo's (LiveInterval val#).
+  VNInfo::Allocator VNInfoAllocator;
+
+  /// Live interval pointers for all the virtual registers.
+  IndexedMap<LiveInterval *, VirtReg2IndexFunctor> VirtRegIntervals;
+
+  /// Sorted list of instructions with register mask operands. Always use the
+  /// 'r' slot, RegMasks are normal clobbers, not early clobbers.
+  SmallVector<SlotIndex, 8> RegMaskSlots;
+
+  /// This vector is parallel to RegMaskSlots, it holds a pointer to the
+  /// corresponding register mask.  This pointer can be recomputed as:
+  ///
+  ///   MI = Indexes->getInstructionFromIndex(RegMaskSlot[N]);
+  ///   unsigned OpNum = findRegMaskOperand(MI);
+  ///   RegMaskBits[N] = MI->getOperand(OpNum).getRegMask();
+  ///
+  /// This is kept in a separate vector partly because some standard
+  /// libraries don't support lower_bound() with mixed objects, partly to
+  /// improve locality when searching in RegMaskSlots.
+  /// Also see the comment in LiveInterval::find().
+  SmallVector<const uint32_t *, 8> RegMaskBits;
+
+  /// For each basic block number, keep (begin, size) pairs indexing into the
+  /// RegMaskSlots and RegMaskBits arrays.
+  /// Note that basic block numbers may not be layout contiguous, that's why
+  /// we can't just keep track of the first register mask in each basic
+  /// block.
+  SmallVector<std::pair<unsigned, unsigned>, 8> RegMaskBlocks;
+
+  /// Keeps a live range set for each register unit to track fixed physreg
+  /// interference.
+  SmallVector<LiveRange *, 0> RegUnitRanges;
+
+public:
+  static char ID;
+
+  LiveIntervals();
+  ~LiveIntervals() override;
+
+  /// Calculate the spill weight to assign to a single instruction.
+  static float getSpillWeight(bool isDef, bool isUse,
+                              const MachineBlockFrequencyInfo *MBFI,
+                              const MachineInstr &MI);
+
+  /// Calculate the spill weight to assign to a single instruction.
+  static float getSpillWeight(bool isDef, bool isUse,
+                              const MachineBlockFrequencyInfo *MBFI,
+                              const MachineBasicBlock *MBB);
+
+  LiveInterval &getInterval(Register Reg) {
+    if (hasInterval(Reg))
       return *VirtRegIntervals[Reg.id()];
-    }
-
-    LiveInterval &createAndComputeVirtRegInterval(Register Reg) {
-      LiveInterval &LI = createEmptyInterval(Reg);
-      computeVirtRegInterval(LI);
-      return LI;
-    }
-
-    /// Return an existing interval for \p Reg.
-    /// If \p Reg has no interval then this creates a new empty one instead.
-    /// Note: does not trigger interval computation.
-    LiveInterval &getOrCreateEmptyInterval(Register Reg) {
-      return hasInterval(Reg) ? getInterval(Reg) : createEmptyInterval(Reg);
-    }
-
-    /// Interval removal.
-    void removeInterval(Register Reg) {
-      delete VirtRegIntervals[Reg];
-      VirtRegIntervals[Reg] = nullptr;
-    }
-
-    /// Given a register and an instruction, adds a live segment from that
-    /// instruction to the end of its MBB.
-    LiveInterval::Segment addSegmentToEndOfBlock(Register Reg,
-                                                 MachineInstr &startInst);
-
-    /// After removing some uses of a register, shrink its live range to just
-    /// the remaining uses. This method does not compute reaching defs for new
-    /// uses, and it doesn't remove dead defs.
-    /// Dead PHIDef values are marked as unused. New dead machine instructions
-    /// are added to the dead vector. Returns true if the interval may have been
-    /// separated into multiple connected components.
-    bool shrinkToUses(LiveInterval *li,
-                      SmallVectorImpl<MachineInstr*> *dead = nullptr);
-
-    /// Specialized version of
-    /// shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead)
-    /// that works on a subregister live range and only looks at uses matching
-    /// the lane mask of the subregister range.
-    /// This may leave the subrange empty which needs to be cleaned up with
-    /// LiveInterval::removeEmptySubranges() afterwards.
-    void shrinkToUses(LiveInterval::SubRange &SR, Register Reg);
-
-    /// Extend the live range \p LR to reach all points in \p Indices. The
-    /// points in the \p Indices array must be jointly dominated by the union
-    /// of the existing defs in \p LR and points in \p Undefs.
-    ///
-    /// PHI-defs are added as needed to maintain SSA form.
-    ///
-    /// If a SlotIndex in \p Indices is the end index of a basic block, \p LR
-    /// will be extended to be live out of the basic block.
-    /// If a SlotIndex in \p Indices is jointy dominated only by points in
-    /// \p Undefs, the live range will not be extended to that point.
-    ///
-    /// See also LiveRangeCalc::extend().
-    void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices,
-                         ArrayRef<SlotIndex> Undefs);
-
-    void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) {
-      extendToIndices(LR, Indices, /*Undefs=*/{});
-    }
 
-    /// If \p LR has a live value at \p Kill, prune its live range by removing
-    /// any liveness reachable from Kill. Add live range end points to
-    /// EndPoints such that extendToIndices(LI, EndPoints) will reconstruct the
-    /// value's live range.
-    ///
-    /// Calling pruneValue() and extendToIndices() can be used to reconstruct
-    /// SSA form after adding defs to a virtual register.
-    void pruneValue(LiveRange &LR, SlotIndex Kill,
-                    SmallVectorImpl<SlotIndex> *EndPoints);
-
-    /// This function should not be used. Its intent is to tell you that you are
-    /// doing something wrong if you call pruneValue directly on a
-    /// LiveInterval. Indeed, you are supposed to call pruneValue on the main
-    /// LiveRange and all the LiveRanges of the subranges if any.
-    LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
-                                          SmallVectorImpl<SlotIndex> *) {
-      llvm_unreachable(
-          "Use pruneValue on the main LiveRange and on each subrange");
+    return createAndComputeVirtRegInterval(Reg);
+  }
+
+  const LiveInterval &getInterval(Register Reg) const {
+    return const_cast<LiveIntervals *>(this)->getInterval(Reg);
+  }
+
+  bool hasInterval(Register Reg) const {
+    return VirtRegIntervals.inBounds(Reg.id()) && VirtRegIntervals[Reg.id()];
+  }
+
+  /// Interval creation.
+  LiveInterval &createEmptyInterval(Register Reg) {
+    assert(!hasInterval(Reg) && "Interval already exists!");
+    VirtRegIntervals.grow(Reg.id());
+    VirtRegIntervals[Reg.id()] = createInterval(Reg);
+    return *VirtRegIntervals[Reg.id()];
+  }
+
+  LiveInterval &createAndComputeVirtRegInterval(Register Reg) {
+    LiveInterval &LI = createEmptyInterval(Reg);
+    computeVirtRegInterval(LI);
+    return LI;
+  }
+
+  /// Return an existing interval for \p Reg.
+  /// If \p Reg has no interval then this creates a new empty one instead.
+  /// Note: does not trigger interval computation.
+  LiveInterval &getOrCreateEmptyInterval(Register Reg) {
+    return hasInterval(Reg) ? getInterval(Reg) : createEmptyInterval(Reg);
+  }
+
+  /// Interval removal.
+  void removeInterval(Register Reg) {
+    delete VirtRegIntervals[Reg];
+    VirtRegIntervals[Reg] = nullptr;
+  }
+
+  /// Given a register and an instruction, adds a live segment from that
+  /// instruction to the end of its MBB.
+  LiveInterval::Segment addSegmentToEndOfBlock(Register Reg,
+                                               MachineInstr &startInst);
+
+  /// After removing some uses of a register, shrink its live range to just
+  /// the remaining uses. This method does not compute reaching defs for new
+  /// uses, and it doesn't remove dead defs.
+  /// Dead PHIDef values are marked as unused. New dead machine instructions
+  /// are added to the dead vector. Returns true if the interval may have been
+  /// separated into multiple connected components.
+  bool shrinkToUses(LiveInterval *li,
+                    SmallVectorImpl<MachineInstr *> *dead = nullptr);
+
+  /// Specialized version of
+  /// shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead)
+  /// that works on a subregister live range and only looks at uses matching
+  /// the lane mask of the subregister range.
+  /// This may leave the subrange empty which needs to be cleaned up with
+  /// LiveInterval::removeEmptySubranges() afterwards.
+  void shrinkToUses(LiveInterval::SubRange &SR, Register Reg);
+
+  /// Extend the live range \p LR to reach all points in \p Indices. The
+  /// points in the \p Indices array must be jointly dominated by the union
+  /// of the existing defs in \p LR and points in \p Undefs.
+  ///
+  /// PHI-defs are added as needed to maintain SSA form.
+  ///
+  /// If a SlotIndex in \p Indices is the end index of a basic block, \p LR
+  /// will be extended to be live out of the basic block.
+  /// If a SlotIndex in \p Indices is jointy dominated only by points in
+  /// \p Undefs, the live range will not be extended to that point.
+  ///
+  /// See also LiveRangeCalc::extend().
+  void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices,
+                       ArrayRef<SlotIndex> Undefs);
+
+  void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) {
+    extendToIndices(LR, Indices, /*Undefs=*/{});
+  }
+
+  /// If \p LR has a live value at \p Kill, prune its live range by removing
+  /// any liveness reachable from Kill. Add live range end points to
+  /// EndPoints such that extendToIndices(LI, EndPoints) will reconstruct the
+  /// value's live range.
+  ///
+  /// Calling pruneValue() and extendToIndices() can be used to reconstruct
+  /// SSA form after adding defs to a virtual register.
+  void pruneValue(LiveRange &LR, SlotIndex Kill,
+                  SmallVectorImpl<SlotIndex> *EndPoints);
+
+  /// This function should not be used. Its intent is to tell you that you are
+  /// doing something wrong if you call pruneValue directly on a
+  /// LiveInterval. Indeed, you are supposed to call pruneValue on the main
+  /// LiveRange and all the LiveRanges of the subranges if any.
+  LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
+                                        SmallVectorImpl<SlotIndex> *) {
+    llvm_unreachable(
+        "Use pruneValue on the main LiveRange and on each subrange");
+  }
+
+  SlotIndexes *getSlotIndexes() const { return Indexes; }
+
+  /// Returns true if the specified machine instr has been removed or was
+  /// never entered in the map.
+  bool isNotInMIMap(const MachineInstr &Instr) const {
+    return !Indexes->hasIndex(Instr);
+  }
+
+  /// Returns the base index of the given instruction.
+  SlotIndex getInstructionIndex(const MachineInstr &Instr) const {
+    return Indexes->getInstructionIndex(Instr);
+  }
+
+  /// Returns the instruction associated with the given index.
+  MachineInstr *getInstructionFromIndex(SlotIndex index) const {
+    return Indexes->getInstructionFromIndex(index);
+  }
+
+  /// Return the first index in the given basic block.
+  SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+    return Indexes->getMBBStartIdx(mbb);
+  }
+
+  /// Return the last index in the given basic block.
+  SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+    return Indexes->getMBBEndIdx(mbb);
+  }
+
+  bool isLiveInToMBB(const LiveRange &LR, const MachineBasicBlock *mbb) const {
+    return LR.liveAt(getMBBStartIdx(mbb));
+  }
+
+  bool isLiveOutOfMBB(const LiveRange &LR, const MachineBasicBlock *mbb) const {
+    return LR.liveAt(getMBBEndIdx(mbb).getPrevSlot());
+  }
+
+  MachineBasicBlock *getMBBFromIndex(SlotIndex index) const {
+    return Indexes->getMBBFromIndex(index);
+  }
+
+  void insertMBBInMaps(MachineBasicBlock *MBB) {
+    Indexes->insertMBBInMaps(MBB);
+    assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() &&
+           "Blocks must be added in order.");
+    RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0));
+  }
+
+  SlotIndex InsertMachineInstrInMaps(MachineInstr &MI) {
+    return Indexes->insertMachineInstrInMaps(MI);
+  }
+
+  void InsertMachineInstrRangeInMaps(MachineBasicBlock::iterator B,
+                                     MachineBasicBlock::iterator E) {
+    for (MachineBasicBlock::iterator I = B; I != E; ++I)
+      Indexes->insertMachineInstrInMaps(*I);
+  }
+
+  void RemoveMachineInstrFromMaps(MachineInstr &MI) {
+    Indexes->removeMachineInstrFromMaps(MI);
+  }
+
+  SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
+    return Indexes->replaceMachineInstrInMaps(MI, NewMI);
+  }
+
+  VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+
+  /// Pass entry point; Calculates LiveIntervals.
+  bool runOnMachineFunction(MachineFunction &) override;
+
+  /// Implement the dump method.
+  void print(raw_ostream &O, const Module * = nullptr) const override;
+
+  /// If LI is confined to a single basic block, return a pointer to that
+  /// block.  If LI is live in to or out of any block, return NULL.
+  MachineBasicBlock *intervalIsInOneMBB(const LiveInterval &LI) const;
+
+  /// Returns true if VNI is killed by any PHI-def values in LI.
+  /// This may conservatively return true to avoid expensive computations.
+  bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const;
+
+  /// Add kill flags to any instruction that kills a virtual register.
+  void addKillFlags(const VirtRegMap *);
+
+  /// Call this method to notify LiveIntervals that instruction \p MI has been
+  /// moved within a basic block. This will update the live intervals for all
+  /// operands of \p MI. Moves between basic blocks are not supported.
+  ///
+  /// \param UpdateFlags Update live intervals for nonallocatable physregs.
+  void handleMove(MachineInstr &MI, bool UpdateFlags = false);
+
+  /// Update intervals of operands of all instructions in the newly
+  /// created bundle specified by \p BundleStart.
+  ///
+  /// \param UpdateFlags Update live intervals for nonallocatable physregs.
+  ///
+  /// Assumes existing liveness is accurate.
+  /// \pre BundleStart should be the first instruction in the Bundle.
+  /// \pre BundleStart should not have a have SlotIndex as one will be assigned.
+  void handleMoveIntoNewBundle(MachineInstr &BundleStart,
+                               bool UpdateFlags = false);
+
+  /// Update live intervals for instructions in a range of iterators. It is
+  /// intended for use after target hooks that may insert or remove
+  /// instructions, and is only efficient for a small number of instructions.
+  ///
+  /// OrigRegs is a vector of registers that were originally used by the
+  /// instructions in the range between the two iterators.
+  ///
+  /// Currently, the only changes that are supported are simple removal
+  /// and addition of uses.
+  void repairIntervalsInRange(MachineBasicBlock *MBB,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
+                              ArrayRef<Register> OrigRegs);
+
+  // Register mask functions.
+  //
+  // Machine instructions may use a register mask operand to indicate that a
+  // large number of registers are clobbered by the instruction.  This is
+  // typically used for calls.
+  //
+  // For compile time performance reasons, these clobbers are not recorded in
+  // the live intervals for individual physical registers.  Instead,
+  // LiveIntervalAnalysis maintains a sorted list of instructions with
+  // register mask operands.
+
+  /// Returns a sorted array of slot indices of all instructions with
+  /// register mask operands.
+  ArrayRef<SlotIndex> getRegMaskSlots() const { return RegMaskSlots; }
+
+  /// Returns a sorted array of slot indices of all instructions with register
+  /// mask operands in the basic block numbered \p MBBNum.
+  ArrayRef<SlotIndex> getRegMaskSlotsInBlock(unsigned MBBNum) const {
+    std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
+    return getRegMaskSlots().slice(P.first, P.second);
+  }
+
+  /// Returns an array of register mask pointers corresponding to
+  /// getRegMaskSlots().
+  ArrayRef<const uint32_t *> getRegMaskBits() const { return RegMaskBits; }
+
+  /// Returns an array of mask pointers corresponding to
+  /// getRegMaskSlotsInBlock(MBBNum).
+  ArrayRef<const uint32_t *> getRegMaskBitsInBlock(unsigned MBBNum) const {
+    std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
+    return getRegMaskBits().slice(P.first, P.second);
+  }
+
+  /// Test if \p LI is live across any register mask instructions, and
+  /// compute a bit mask of physical registers that are not clobbered by any
+  /// of them.
+  ///
+  /// Returns false if \p LI doesn't cross any register mask instructions. In
+  /// that case, the bit vector is not filled in.
+  bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs);
+
+  // Register unit functions.
+  //
+  // Fixed interference occurs when MachineInstrs use physregs directly
+  // instead of virtual registers. This typically happens when passing
+  // arguments to a function call, or when instructions require operands in
+  // fixed registers.
+  //
+  // Each physreg has one or more register units, see MCRegisterInfo. We
+  // track liveness per register unit to handle aliasing registers more
+  // efficiently.
+
+  /// Return the live range for register unit \p Unit. It will be computed if
+  /// it doesn't exist.
+  LiveRange &getRegUnit(unsigned Unit) {
+    LiveRange *LR = RegUnitRanges[Unit];
+    if (!LR) {
+      // Compute missing ranges on demand.
+      // Use segment set to speed-up initial computation of the live range.
+      RegUnitRanges[Unit] = LR = new LiveRange(UseSegmentSetForPhysRegs);
+      computeRegUnitRange(*LR, Unit);
     }
-
-    SlotIndexes *getSlotIndexes() const {
-      return Indexes;
-    }
-
-    /// Returns true if the specified machine instr has been removed or was
-    /// never entered in the map.
-    bool isNotInMIMap(const MachineInstr &Instr) const {
-      return !Indexes->hasIndex(Instr);
-    }
-
-    /// Returns the base index of the given instruction.
-    SlotIndex getInstructionIndex(const MachineInstr &Instr) const {
-      return Indexes->getInstructionIndex(Instr);
-    }
-
-    /// Returns the instruction associated with the given index.
-    MachineInstr* getInstructionFromIndex(SlotIndex index) const {
-      return Indexes->getInstructionFromIndex(index);
-    }
-
-    /// Return the first index in the given basic block.
-    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
-      return Indexes->getMBBStartIdx(mbb);
-    }
-
-    /// Return the last index in the given basic block.
-    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
-      return Indexes->getMBBEndIdx(mbb);
-    }
-
-    bool isLiveInToMBB(const LiveRange &LR,
-                       const MachineBasicBlock *mbb) const {
-      return LR.liveAt(getMBBStartIdx(mbb));
-    }
-
-    bool isLiveOutOfMBB(const LiveRange &LR,
-                        const MachineBasicBlock *mbb) const {
-      return LR.liveAt(getMBBEndIdx(mbb).getPrevSlot());
-    }
-
-    MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
-      return Indexes->getMBBFromIndex(index);
-    }
-
-    void insertMBBInMaps(MachineBasicBlock *MBB) {
-      Indexes->insertMBBInMaps(MBB);
-      assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() &&
-             "Blocks must be added in order.");
-      RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0));
-    }
-
-    SlotIndex InsertMachineInstrInMaps(MachineInstr &MI) {
-      return Indexes->insertMachineInstrInMaps(MI);
-    }
-
-    void InsertMachineInstrRangeInMaps(MachineBasicBlock::iterator B,
-                                       MachineBasicBlock::iterator E) {
-      for (MachineBasicBlock::iterator I = B; I != E; ++I)
-        Indexes->insertMachineInstrInMaps(*I);
-    }
-
-    void RemoveMachineInstrFromMaps(MachineInstr &MI) {
-      Indexes->removeMachineInstrFromMaps(MI);
-    }
-
-    SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
-      return Indexes->replaceMachineInstrInMaps(MI, NewMI);
-    }
-
-    VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    void releaseMemory() override;
-
-    /// Pass entry point; Calculates LiveIntervals.
-    bool runOnMachineFunction(MachineFunction&) override;
-
-    /// Implement the dump method.
-    void print(raw_ostream &O, const Module* = nullptr) const override;
-
-    /// If LI is confined to a single basic block, return a pointer to that
-    /// block.  If LI is live in to or out of any block, return NULL.
-    MachineBasicBlock *intervalIsInOneMBB(const LiveInterval &LI) const;
-
-    /// Returns true if VNI is killed by any PHI-def values in LI.
-    /// This may conservatively return true to avoid expensive computations.
-    bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const;
-
-    /// Add kill flags to any instruction that kills a virtual register.
-    void addKillFlags(const VirtRegMap*);
-
-    /// Call this method to notify LiveIntervals that instruction \p MI has been
-    /// moved within a basic block. This will update the live intervals for all
-    /// operands of \p MI. Moves between basic blocks are not supported.
-    ///
-    /// \param UpdateFlags Update live intervals for nonallocatable physregs.
-    void handleMove(MachineInstr &MI, bool UpdateFlags = false);
-
-    /// Update intervals of operands of all instructions in the newly
-    /// created bundle specified by \p BundleStart.
-    ///
-    /// \param UpdateFlags Update live intervals for nonallocatable physregs.
-    ///
-    /// Assumes existing liveness is accurate.
-    /// \pre BundleStart should be the first instruction in the Bundle.
-    /// \pre BundleStart should not have a have SlotIndex as one will be assigned.
-    void handleMoveIntoNewBundle(MachineInstr &BundleStart,
-                                 bool UpdateFlags = false);
-
-    /// Update live intervals for instructions in a range of iterators. It is
-    /// intended for use after target hooks that may insert or remove
-    /// instructions, and is only efficient for a small number of instructions.
-    ///
-    /// OrigRegs is a vector of registers that were originally used by the
-    /// instructions in the range between the two iterators.
-    ///
-    /// Currently, the only changes that are supported are simple removal
-    /// and addition of uses.
-    void repairIntervalsInRange(MachineBasicBlock *MBB,
-                                MachineBasicBlock::iterator Begin,
-                                MachineBasicBlock::iterator End,
-                                ArrayRef<Register> OrigRegs);
-
-    // Register mask functions.
-    //
-    // Machine instructions may use a register mask operand to indicate that a
-    // large number of registers are clobbered by the instruction.  This is
-    // typically used for calls.
-    //
-    // For compile time performance reasons, these clobbers are not recorded in
-    // the live intervals for individual physical registers.  Instead,
-    // LiveIntervalAnalysis maintains a sorted list of instructions with
-    // register mask operands.
-
-    /// Returns a sorted array of slot indices of all instructions with
-    /// register mask operands.
-    ArrayRef<SlotIndex> getRegMaskSlots() const { return RegMaskSlots; }
-
-    /// Returns a sorted array of slot indices of all instructions with register
-    /// mask operands in the basic block numbered \p MBBNum.
-    ArrayRef<SlotIndex> getRegMaskSlotsInBlock(unsigned MBBNum) const {
-      std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
-      return getRegMaskSlots().slice(P.first, P.second);
-    }
-
-    /// Returns an array of register mask pointers corresponding to
-    /// getRegMaskSlots().
-    ArrayRef<const uint32_t*> getRegMaskBits() const { return RegMaskBits; }
-
-    /// Returns an array of mask pointers corresponding to
-    /// getRegMaskSlotsInBlock(MBBNum).
-    ArrayRef<const uint32_t*> getRegMaskBitsInBlock(unsigned MBBNum) const {
-      std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
-      return getRegMaskBits().slice(P.first, P.second);
-    }
-
-    /// Test if \p LI is live across any register mask instructions, and
-    /// compute a bit mask of physical registers that are not clobbered by any
-    /// of them.
-    ///
-    /// Returns false if \p LI doesn't cross any register mask instructions. In
-    /// that case, the bit vector is not filled in.
-    bool checkRegMaskInterference(const LiveInterval &LI,
-                                  BitVector &UsableRegs);
-
-    // Register unit functions.
-    //
-    // Fixed interference occurs when MachineInstrs use physregs directly
-    // instead of virtual registers. This typically happens when passing
-    // arguments to a function call, or when instructions require operands in
-    // fixed registers.
-    //
-    // Each physreg has one or more register units, see MCRegisterInfo. We
-    // track liveness per register unit to handle aliasing registers more
-    // efficiently.
-
-    /// Return the live range for register unit \p Unit. It will be computed if
-    /// it doesn't exist.
-    LiveRange &getRegUnit(unsigned Unit) {
-      LiveRange *LR = RegUnitRanges[Unit];
-      if (!LR) {
-        // Compute missing ranges on demand.
-        // Use segment set to speed-up initial computation of the live range.
-        RegUnitRanges[Unit] = LR = new LiveRange(UseSegmentSetForPhysRegs);
-        computeRegUnitRange(*LR, Unit);
-      }
-      return *LR;
-    }
-
-    /// Return the live range for register unit \p Unit if it has already been
-    /// computed, or nullptr if it hasn't been computed yet.
-    LiveRange *getCachedRegUnit(unsigned Unit) {
-      return RegUnitRanges[Unit];
-    }
-
-    const LiveRange *getCachedRegUnit(unsigned Unit) const {
-      return RegUnitRanges[Unit];
-    }
-
-    /// Remove computed live range for register unit \p Unit. Subsequent uses
-    /// should rely on on-demand recomputation.
-    void removeRegUnit(unsigned Unit) {
-      delete RegUnitRanges[Unit];
-      RegUnitRanges[Unit] = nullptr;
-    }
-
-    /// Remove associated live ranges for the register units associated with \p
-    /// Reg. Subsequent uses should rely on on-demand recomputation.  \note This
-    /// method can result in inconsistent liveness tracking if multiple phyical
-    /// registers share a regunit, and should be used cautiously.
-    void removeAllRegUnitsForPhysReg(MCRegister Reg) {
-      for (MCRegUnit Unit : TRI->regunits(Reg))
-        removeRegUnit(Unit);
-    }
-
-    /// Remove value numbers and related live segments starting at position
-    /// \p Pos that are part of any liverange of physical register \p Reg or one
-    /// of its subregisters.
-    void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos);
-
-    /// Remove value number and related live segments of \p LI and its subranges
-    /// that start at position \p Pos.
-    void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);
-
-    /// Split separate components in LiveInterval \p LI into separate intervals.
-    void splitSeparateComponents(LiveInterval &LI,
-                                 SmallVectorImpl<LiveInterval*> &SplitLIs);
-
-    /// For live interval \p LI with correct SubRanges construct matching
-    /// information for the main live range. Expects the main live range to not
-    /// have any segments or value numbers.
-    void constructMainRangeFromSubranges(LiveInterval &LI);
-
-  private:
-    /// Compute live intervals for all virtual registers.
-    void computeVirtRegs();
-
-    /// Compute RegMaskSlots and RegMaskBits.
-    void computeRegMasks();
-
-    /// Walk the values in \p LI and check for dead values:
-    /// - Dead PHIDef values are marked as unused.
-    /// - Dead operands are marked as such.
-    /// - Completely dead machine instructions are added to the \p dead vector
-    ///   if it is not nullptr.
-    /// Returns true if any PHI value numbers have been removed which may
-    /// have separated the interval into multiple connected components.
-    bool computeDeadValues(LiveInterval &LI,
-                           SmallVectorImpl<MachineInstr*> *dead);
-
-    static LiveInterval *createInterval(Register Reg);
-
-    void printInstrs(raw_ostream &O) const;
-    void dumpInstrs() const;
-
-    void computeLiveInRegUnits();
-    void computeRegUnitRange(LiveRange&, unsigned Unit);
-    bool computeVirtRegInterval(LiveInterval&);
-
-    using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
-    void extendSegmentsToUses(LiveRange &Segments,
-                              ShrinkToUsesWorkList &WorkList, Register Reg,
-                              LaneBitmask LaneMask);
-
-    /// Helper function for repairIntervalsInRange(), walks backwards and
-    /// creates/modifies live segments in \p LR to match the operands found.
-    /// Only full operands or operands with subregisters matching \p LaneMask
-    /// are considered.
-    void repairOldRegInRange(MachineBasicBlock::iterator Begin,
-                             MachineBasicBlock::iterator End,
-                             const SlotIndex endIdx, LiveRange &LR,
-                             Register Reg,
-                             LaneBitmask LaneMask = LaneBitmask::getAll());
-
-    class HMEditor;
-  };
+    return *LR;
+  }
+
+  /// Return the live range for register unit \p Unit if it has already been
+  /// computed, or nullptr if it hasn't been computed yet.
+  LiveRange *getCachedRegUnit(unsigned Unit) { return RegUnitRanges[Unit]; }
+
+  const LiveRange *getCachedRegUnit(unsigned Unit) const {
+    return RegUnitRanges[Unit];
+  }
+
+  /// Remove computed live range for register unit \p Unit. Subsequent uses
+  /// should rely on on-demand recomputation.
+  void removeRegUnit(unsigned Unit) {
+    delete RegUnitRanges[Unit];
+    RegUnitRanges[Unit] = nullptr;
+  }
+
+  /// Remove associated live ranges for the register units associated with \p
+  /// Reg. Subsequent uses should rely on on-demand recomputation.  \note This
+  /// method can result in inconsistent liveness tracking if multiple phyical
+  /// registers share a regunit, and should be used cautiously.
+  void removeAllRegUnitsForPhysReg(MCRegister Reg) {
+    for (MCRegUnit Unit : TRI->regunits(Reg))
+      removeRegUnit(Unit);
+  }
+
+  /// Remove value numbers and related live segments starting at position
+  /// \p Pos that are part of any liverange of physical register \p Reg or one
+  /// of its subregisters.
+  void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos);
+
+  /// Remove value number and related live segments of \p LI and its subranges
+  /// that start at position \p Pos.
+  void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);
+
+  /// Split separate components in LiveInterval \p LI into separate intervals.
+  void splitSeparateComponents(LiveInterval &LI,
+                               SmallVectorImpl<LiveInterval *> &SplitLIs);
+
+  /// For live interval \p LI with correct SubRanges construct matching
+  /// information for the main live range. Expects the main live range to not
+  /// have any segments or value numbers.
+  void constructMainRangeFromSubranges(LiveInterval &LI);
+
+private:
+  /// Compute live intervals for all virtual registers.
+  void computeVirtRegs();
+
+  /// Compute RegMaskSlots and RegMaskBits.
+  void computeRegMasks();
+
+  /// Walk the values in \p LI and check for dead values:
+  /// - Dead PHIDef values are marked as unused.
+  /// - Dead operands are marked as such.
+  /// - Completely dead machine instructions are added to the \p dead vector
+  ///   if it is not nullptr.
+  /// Returns true if any PHI value numbers have been removed which may
+  /// have separated the interval into multiple connected components.
+  bool computeDeadValues(LiveInterval &LI,
+                         SmallVectorImpl<MachineInstr *> *dead);
+
+  static LiveInterval *createInterval(Register Reg);
+
+  void printInstrs(raw_ostream &O) const;
+  void dumpInstrs() const;
+
+  void computeLiveInRegUnits();
+  void computeRegUnitRange(LiveRange &, unsigned Unit);
+  bool computeVirtRegInterval(LiveInterval &);
+
+  using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo *>, 16>;
+  void extendSegmentsToUses(LiveRange &Segments, ShrinkToUsesWorkList &WorkList,
+                            Register Reg, LaneBitmask LaneMask);
+
+  /// Helper function for repairIntervalsInRange(), walks backwards and
+  /// creates/modifies live segments in \p LR to match the operands found.
+  /// Only full operands or operands with subregisters matching \p LaneMask
+  /// are considered.
+  void repairOldRegInRange(MachineBasicBlock::iterator Begin,
+                           MachineBasicBlock::iterator End,
+                           const SlotIndex endIdx, LiveRange &LR, Register Reg,
+                           LaneBitmask LaneMask = LaneBitmask::getAll());
+
+  class HMEditor;
+};
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index 540a21c1118b86..53eddebdd6ae68 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -43,7 +43,7 @@ typedef unsigned ID;
 
 // Choose ';' as the delimiter. ':' was used once but it doesn't work well for
 // Objective-C functions which commonly have :'s in their names.
-inline constexpr char kGlobalIdentifierDelimiter = ';';
+inline constexpr char GlobalIdentifierDelimiter = ';';
 
 class GlobalValue : public Constant {
 public:
diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index a19f8fcc79d741..e41b3d51173d4d 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -48,6 +48,13 @@ enum class SymtabWritingMode {
   BigArchive64  // Only write the 64-bit symbol table.
 };
 
+// Write an archive directly to an output stream.
+Error writeArchiveToStream(raw_ostream &Out,
+                           ArrayRef<NewArchiveMember> NewMembers,
+                           SymtabWritingMode WriteSymtab,
+                           object::Archive::Kind Kind, bool Deterministic,
+                           bool Thin, std::optional<bool> IsEC = std::nullopt);
+
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
                    bool Deterministic, bool Thin,
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 51d590be124f10..5c2a78c14efd0f 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -66,8 +66,8 @@ inline std::error_code make_error_code(sampleprof_error E) {
   return std::error_code(static_cast<int>(E), sampleprof_category());
 }
 
-inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
-                                    sampleprof_error Result) {
+inline sampleprof_error mergeSampleProfErrors(sampleprof_error &Accumulator,
+                                              sampleprof_error Result) {
   // Prefer first error encountered as later errors may be secondary effects of
   // the initial problem.
   if (Accumulator == sampleprof_error::success &&
@@ -129,7 +129,7 @@ enum SecType {
 };
 
 static inline std::string getSecName(SecType Type) {
-  switch ((int)Type) { // Avoid -Wcovered-switch-default
+  switch (static_cast<int>(Type)) { // Avoid -Wcovered-switch-default
   case SecInValid:
     return "InvalidSection";
   case SecProfSummary:
@@ -392,7 +392,7 @@ class SampleRecord {
   uint64_t getSamples() const { return NumSamples; }
   const CallTargetMap &getCallTargets() const { return CallTargets; }
   const SortedCallTargetSet getSortedCallTargets() const {
-    return SortCallTargets(CallTargets);
+    return sortCallTargets(CallTargets);
   }
 
   uint64_t getCallTargetSum() const {
@@ -403,7 +403,8 @@ class SampleRecord {
   }
 
   /// Sort call targets in descending order of call frequency.
-  static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
+  static const SortedCallTargetSet
+  sortCallTargets(const CallTargetMap &Targets) {
     SortedCallTargetSet SortedTargets;
     for (const auto &[Target, Frequency] : Targets) {
       SortedTargets.emplace(Target, Frequency);
@@ -642,8 +643,8 @@ class SampleContext {
   }
 
   /// Set the name of the function and clear the current context.
-  void setFunction(FunctionId newFunction) {
-    Func = newFunction;
+  void setFunction(FunctionId NewFunctionID) {
+    Func = NewFunctionID;
     FullContext = SampleContextFrames();
     State = UnknownContext;
   }
@@ -692,7 +693,7 @@ class SampleContext {
     }
   };
 
-  bool IsPrefixOf(const SampleContext &That) const {
+  bool isPrefixOf(const SampleContext &That) const {
     auto ThisContext = FullContext;
     auto ThatContext = That.FullContext;
     if (ThatContext.size() < ThisContext.size())
@@ -846,11 +847,11 @@ class FunctionSamples {
   }
 
   // Set current context and all callee contexts to be synthetic.
-  void SetContextSynthetic() {
+  void setContextSynthetic() {
     Context.setState(SyntheticContext);
     for (auto &I : CallsiteSamples) {
       for (auto &CS : I.second) {
-        CS.second.SetContextSynthetic();
+        CS.second.setContextSynthetic();
       }
     }
   }
@@ -864,8 +865,7 @@ class FunctionSamples {
     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
     if (ProfileLoc != IRToProfileLocationMap->end())
       return ProfileLoc->second;
-    else
-      return IRLoc;
+    return IRLoc;
   }
 
   /// Return the number of samples collected at the given location.
@@ -873,11 +873,11 @@ class FunctionSamples {
   /// If the location is not found in profile, return error.
   ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
                                   uint32_t Discriminator) const {
-    const auto &ret = BodySamples.find(
+    const auto &Ret = BodySamples.find(
         mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
-    if (ret == BodySamples.end())
+    if (Ret == BodySamples.end())
       return std::error_code();
-    return ret->second.getSamples();
+    return Ret->second.getSamples();
   }
 
   /// Returns the call target map collected at a given location.
@@ -885,11 +885,11 @@ class FunctionSamples {
   /// If the location is not found in profile, return error.
   ErrorOr<const SampleRecord::CallTargetMap &>
   findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
-    const auto &ret = BodySamples.find(
+    const auto &Ret = BodySamples.find(
         mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
-    if (ret == BodySamples.end())
+    if (Ret == BodySamples.end())
       return std::error_code();
-    return ret->second.getCallTargets();
+    return Ret->second.getCallTargets();
   }
 
   /// Returns the call target map collected at a given location specified by \p
@@ -910,10 +910,10 @@ class FunctionSamples {
   /// Returns the FunctionSamplesMap at the given \p Loc.
   const FunctionSamplesMap *
   findFunctionSamplesMapAt(const LineLocation &Loc) const {
-    auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
-    if (iter == CallsiteSamples.end())
+    auto Iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
+    if (Iter == CallsiteSamples.end())
       return nullptr;
-    return &iter->second;
+    return &Iter->second;
   }
 
   /// Returns a pointer to FunctionSamples at the given callsite location
@@ -960,8 +960,8 @@ class FunctionSamples {
     else if (!CallsiteSamples.empty()) {
       // An indirect callsite may be promoted to several inlined direct calls.
       // We need to get the sum of them.
-      for (const auto &N_FS : CallsiteSamples.begin()->second)
-        Count += N_FS.second.getHeadSamplesEstimate();
+      for (const auto &FuncSamples : CallsiteSamples.begin()->second)
+        Count += FuncSamples.second.getHeadSamplesEstimate();
     }
     // Return at least 1 if total sample is not 0.
     return Count ? Count : TotalSamples > 0;
@@ -1013,18 +1013,21 @@ class FunctionSamples {
       return sampleprof_error::hash_mismatch;
     }
 
-    MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
-    MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
+    mergeSampleProfErrors(Result,
+                          addTotalSamples(Other.getTotalSamples(), Weight));
+    mergeSampleProfErrors(Result,
+                          addHeadSamples(Other.getHeadSamples(), Weight));
     for (const auto &I : Other.getBodySamples()) {
       const LineLocation &Loc = I.first;
       const SampleRecord &Rec = I.second;
-      MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
+      mergeSampleProfErrors(Result, BodySamples[Loc].merge(Rec, Weight));
     }
     for (const auto &I : Other.getCallsiteSamples()) {
       const LineLocation &Loc = I.first;
       FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
       for (const auto &Rec : I.second)
-        MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
+        mergeSampleProfErrors(Result,
+                              FSMap[Rec.first].merge(Rec.second, Weight));
     }
     return Result;
   }
@@ -1039,10 +1042,10 @@ class FunctionSamples {
                             uint64_t Threshold) const {
     if (TotalSamples <= Threshold)
       return;
-    auto isDeclaration = [](const Function *F) {
+    auto IsDeclaration = [](const Function *F) {
       return !F || F->isDeclaration();
     };
-    if (isDeclaration(SymbolMap.lookup(getFunction()))) {
+    if (IsDeclaration(SymbolMap.lookup(getFunction()))) {
       // Add to the import list only when it's defined out of module.
       S.insert(getGUID());
     }
@@ -1052,7 +1055,7 @@ class FunctionSamples {
       for (const auto &TS : BS.second.getCallTargets())
         if (TS.second > Threshold) {
           const Function *Callee = SymbolMap.lookup(TS.first);
-          if (isDeclaration(Callee))
+          if (IsDeclaration(Callee))
             S.insert(TS.first.getHashCode());
         }
     for (const auto &CS : CallsiteSamples)
@@ -1061,8 +1064,8 @@ class FunctionSamples {
   }
 
   /// Set the name of the function.
-  void setFunction(FunctionId newFunction) {
-    Context.setFunction(newFunction);
+  void setFunction(FunctionId NewFunctionID) {
+    Context.setFunction(NewFunctionID);
   }
 
   /// Return the function name.
@@ -1083,7 +1086,7 @@ class FunctionSamples {
   /// Return the canonical name for a function, taking into account
   /// suffix elision policy attributes.
   static StringRef getCanonicalFnName(const Function &F) {
-    auto AttrName = "sample-profile-suffix-elision-policy";
+    const char *AttrName = "sample-profile-suffix-elision-policy";
     auto Attr = F.getFnAttribute(AttrName).getValueAsString();
     return getCanonicalFnName(F.getName(), Attr);
   }
@@ -1099,12 +1102,12 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
-    if (Attr == "" || Attr == "all") {
+    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+    if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
-    } else if (Attr == "selected") {
+    if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const auto &Suf : knownSuffixes) {
+      for (const auto &Suf : KnownSuffixes) {
         StringRef Suffix(Suf);
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
@@ -1118,11 +1121,10 @@ class FunctionSamples {
           Cand = Cand.substr(0, It);
       }
       return Cand;
-    } else if (Attr == "none") {
-      return FnName;
-    } else {
-      assert(false && "internal error: unknown suffix elision policy");
     }
+    if (Attr == "none")
+      return FnName;
+    assert(false && "internal error: unknown suffix elision policy");
     return FnName;
   }
 
@@ -1307,7 +1309,7 @@ class SampleProfileMap
 public:
   // Convenience method because this is being used in many places. Set the
   // FunctionSamples' context if its newly inserted.
-  mapped_type &Create(const SampleContext &Ctx) {
+  mapped_type &create(const SampleContext &Ctx) {
     auto Ret = try_emplace(Ctx, FunctionSamples());
     if (Ret.second)
       Ret.first->second.setContext(Ctx);
@@ -1428,7 +1430,7 @@ class ProfileConverter {
       for (const auto &I : InputProfiles) {
         // Retain the profile name and clear the full context for each function
         // profile.
-        FunctionSamples &FS = OutputProfiles.Create(I.second.getFunction());
+        FunctionSamples &FS = OutputProfiles.create(I.second.getFunction());
         FS.merge(I.second);
       }
     } else {
@@ -1507,8 +1509,8 @@ class ProfileSymbolList {
 public:
   /// copy indicates whether we need to copy the underlying memory
   /// for the input Name.
-  void add(StringRef Name, bool copy = false) {
-    if (!copy) {
+  void add(StringRef Name, bool Copy = false) {
+    if (!Copy) {
       Syms.insert(Name);
       return;
     }
diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index 8416e082aec3db..c84f25f6f57541 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -117,8 +117,9 @@ class Value {
   void clearValue() { Val = nullptr; }
   template <typename ItTy, typename SBTy> friend class LLVMOpUserItToSBTy;
 
-public:
   Value(ClassID SubclassID, llvm::Value *Val, Context &Ctx);
+
+public:
   virtual ~Value() = default;
   ClassID getSubclassID() const { return SubclassID; }
 
@@ -146,9 +147,11 @@ class Value {
 
 /// Argument of a sandboxir::Function.
 class Argument : public sandboxir::Value {
-public:
   Argument(llvm::Argument *Arg, sandboxir::Context &Ctx)
       : sandboxir::Value(ClassID::Argument, Arg, Ctx) {}
+  friend class Context; // For constructor.
+
+public:
   static bool classof(const sandboxir::Value *From) {
     return From->getSubclassID() == ClassID::Argument;
   }
@@ -168,8 +171,10 @@ class Argument : public sandboxir::Value {
 };
 
 class User : public Value {
-public:
+protected:
   User(ClassID ID, llvm::Value *V, Context &Ctx) : Value(ID, V, Ctx) {}
+
+public:
   /// For isa/dyn_cast.
   static bool classof(const Value *From);
 #ifndef NDEBUG
@@ -187,9 +192,11 @@ class User : public Value {
 };
 
 class Constant : public sandboxir::User {
-public:
   Constant(llvm::Constant *C, sandboxir::Context &SBCtx)
       : sandboxir::User(ClassID::Constant, C, SBCtx) {}
+  friend class Context; // For constructor.
+
+public:
   /// For isa/dyn_cast.
   static bool classof(const sandboxir::Value *From) {
     return From->getSubclassID() == ClassID::Constant ||
@@ -263,11 +270,11 @@ class Instruction : public sandboxir::User {
 #include "llvm/SandboxIR/SandboxIRValues.def"
   };
 
+protected:
   Instruction(ClassID ID, Opcode Opc, llvm::Instruction *I,
               sandboxir::Context &SBCtx)
       : sandboxir::User(ID, I, SBCtx), Opc(Opc) {}
 
-protected:
   Opcode Opc;
 
 public:
@@ -297,11 +304,13 @@ class Instruction : public sandboxir::User {
 /// An LLLVM Instruction that has no SandboxIR equivalent class gets mapped to
 /// an OpaqueInstr.
 class OpaqueInst : public sandboxir::Instruction {
-public:
   OpaqueInst(llvm::Instruction *I, sandboxir::Context &Ctx)
       : sandboxir::Instruction(ClassID::Opaque, Opcode::Opaque, I, Ctx) {}
   OpaqueInst(ClassID SubclassID, llvm::Instruction *I, sandboxir::Context &Ctx)
       : sandboxir::Instruction(SubclassID, Opcode::Opaque, I, Ctx) {}
+  friend class Context; // For constructor.
+
+public:
   static bool classof(const sandboxir::Value *From) {
     return From->getSubclassID() == ClassID::Opaque;
   }
@@ -326,11 +335,12 @@ class BasicBlock : public Value {
   void buildBasicBlockFromLLVMIR(llvm::BasicBlock *LLVMBB);
   friend class Context; // For `buildBasicBlockFromIR`
 
-public:
   BasicBlock(llvm::BasicBlock *BB, Context &SBCtx)
       : Value(ClassID::Block, BB, SBCtx) {
     buildBasicBlockFromLLVMIR(BB);
   }
+
+public:
   ~BasicBlock() = default;
   /// For isa/dyn_cast.
   static bool classof(const Value *From) {
@@ -385,7 +395,7 @@ class Context {
     auto Pair = LLVMValueToValueMap.insert({LLVMArg, nullptr});
     auto It = Pair.first;
     if (Pair.second) {
-      It->second = std::make_unique<Argument>(LLVMArg, *this);
+      It->second = std::unique_ptr<Argument>(new Argument(LLVMArg, *this));
       return cast<Argument>(It->second.get());
     }
     return cast<Argument>(It->second.get());
@@ -422,10 +432,12 @@ class Function : public sandboxir::Value {
       return *cast<BasicBlock>(Ctx.getValue(&LLVMBB));
     }
   };
-
-public:
+  /// Use Context::createFunction() instead.
   Function(llvm::Function *F, sandboxir::Context &Ctx)
       : sandboxir::Value(ClassID::Function, F, Ctx) {}
+  friend class Context; // For constructor.
+
+public:
   /// For isa/dyn_cast.
   static bool classof(const sandboxir::Value *From) {
     return From->getSubclassID() == ClassID::Function;
diff --git a/llvm/include/llvm/TargetParser/RISCVISAInfo.h b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
index ba2965600decd7..5d3f3e113e96d3 100644
--- a/llvm/include/llvm/TargetParser/RISCVISAInfo.h
+++ b/llvm/include/llvm/TargetParser/RISCVISAInfo.h
@@ -26,9 +26,6 @@ class RISCVISAInfo {
   RISCVISAInfo(const RISCVISAInfo &) = delete;
   RISCVISAInfo &operator=(const RISCVISAInfo &) = delete;
 
-  RISCVISAInfo(unsigned XLen, RISCVISAUtils::OrderedExtensionMap &Exts)
-      : XLen(XLen), FLen(0), MinVLen(0), MaxELen(0), MaxELenFp(0), Exts(Exts) {}
-
   /// Parse RISC-V ISA info from arch string.
   /// If IgnoreUnknown is set, any unrecognised extension names or
   /// extensions with unrecognised versions will be silently dropped, except
@@ -48,6 +45,10 @@ class RISCVISAInfo {
   static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
   parseFeatures(unsigned XLen, const std::vector<std::string> &Features);
 
+  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+  createFromExtMap(unsigned XLen,
+                   const RISCVISAUtils::OrderedExtensionMap &Exts);
+
   /// Convert RISC-V ISA info to a feature vector.
   std::vector<std::string> toFeatures(bool AddAllExtensions = false,
                                       bool IgnoreUnknown = true) const;
@@ -72,8 +73,6 @@ class RISCVISAInfo {
   static bool isSupportedExtensionWithVersion(StringRef Ext);
   static bool isSupportedExtension(StringRef Ext, unsigned MajorVersion,
                                    unsigned MinorVersion);
-  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
-  postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo);
   static std::string getTargetFeatureForExtension(StringRef Ext);
 
 private:
@@ -93,6 +92,9 @@ class RISCVISAInfo {
 
   /// Update FLen, MinVLen, MaxELen, and MaxELenFp.
   void updateImpliedLengths();
+
+  static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+  postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo);
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index e03d8f6eebfca3..2a9b38a98ec3c2 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -157,6 +157,12 @@ enum ArchFeatureKind : uint32_t {
   FEATURE_WGP = 1 << 9,
 };
 
+enum FeatureError : uint32_t {
+  NO_ERROR = 0,
+  INVALID_FEATURE_COMBINATION,
+  UNSUPPORTED_TARGET_FEATURE
+};
+
 StringRef getArchFamilyNameAMDGCN(GPUKind AK);
 
 StringRef getArchNameAMDGCN(GPUKind AK);
@@ -177,8 +183,9 @@ void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
                           StringMap<bool> &Features);
 
 /// Inserts wave size feature for given GPU into features map
-bool insertWaveSizeFeature(StringRef GPU, const Triple &T,
-                           StringMap<bool> &Features, std::string &ErrorMsg);
+std::pair<FeatureError, StringRef>
+insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                      StringMap<bool> &Features);
 
 } // namespace AMDGPU
 } // namespace llvm
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 1b72f151e3692a..0917a362eccf5d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1872,14 +1872,11 @@ static Value *simplifyAndOrOfFCmps(const SimplifyQuery &Q, FCmpInst *LHS,
   if ((PredL == FCmpInst::FCMP_ORD || PredL == FCmpInst::FCMP_UNO) &&
       ((FCmpInst::isOrdered(PredR) && IsAnd) ||
        (FCmpInst::isUnordered(PredR) && !IsAnd))) {
-    // (fcmp ord X, NNAN) & (fcmp o** X, Y) --> fcmp o** X, Y
-    // (fcmp uno X, NNAN) & (fcmp o** X, Y) --> false
-    // (fcmp uno X, NNAN) | (fcmp u** X, Y) --> fcmp u** X, Y
-    // (fcmp ord X, NNAN) | (fcmp u** X, Y) --> true
-    if (((LHS1 == RHS0 || LHS1 == RHS1) &&
-         isKnownNeverNaN(LHS0, /*Depth=*/0, Q)) ||
-        ((LHS0 == RHS0 || LHS0 == RHS1) &&
-         isKnownNeverNaN(LHS1, /*Depth=*/0, Q)))
+    // (fcmp ord X, 0) & (fcmp o** X, Y) --> fcmp o** X, Y
+    // (fcmp uno X, 0) & (fcmp o** X, Y) --> false
+    // (fcmp uno X, 0) | (fcmp u** X, Y) --> fcmp u** X, Y
+    // (fcmp ord X, 0) | (fcmp u** X, Y) --> true
+    if ((LHS0 == RHS0 || LHS0 == RHS1) && match(LHS1, m_PosZeroFP()))
       return FCmpInst::isOrdered(PredL) == FCmpInst::isOrdered(PredR)
                  ? static_cast<Value *>(RHS)
                  : ConstantInt::getBool(LHS->getType(), !IsAnd);
@@ -1888,14 +1885,11 @@ static Value *simplifyAndOrOfFCmps(const SimplifyQuery &Q, FCmpInst *LHS,
   if ((PredR == FCmpInst::FCMP_ORD || PredR == FCmpInst::FCMP_UNO) &&
       ((FCmpInst::isOrdered(PredL) && IsAnd) ||
        (FCmpInst::isUnordered(PredL) && !IsAnd))) {
-    // (fcmp o** X, Y) & (fcmp ord X, NNAN) --> fcmp o** X, Y
-    // (fcmp o** X, Y) & (fcmp uno X, NNAN) --> false
-    // (fcmp u** X, Y) | (fcmp uno X, NNAN) --> fcmp u** X, Y
-    // (fcmp u** X, Y) | (fcmp ord X, NNAN) --> true
-    if (((RHS1 == LHS0 || RHS1 == LHS1) &&
-         isKnownNeverNaN(RHS0, /*Depth=*/0, Q)) ||
-        ((RHS0 == LHS0 || RHS0 == LHS1) &&
-         isKnownNeverNaN(RHS1, /*Depth=*/0, Q)))
+    // (fcmp o** X, Y) & (fcmp ord X, 0) --> fcmp o** X, Y
+    // (fcmp o** X, Y) & (fcmp uno X, 0) --> false
+    // (fcmp u** X, Y) | (fcmp uno X, 0) --> fcmp u** X, Y
+    // (fcmp u** X, Y) | (fcmp ord X, 0) --> true
+    if ((RHS0 == LHS0 || RHS0 == LHS1) && match(RHS1, m_PosZeroFP()))
       return FCmpInst::isOrdered(PredL) == FCmpInst::isOrdered(PredR)
                  ? static_cast<Value *>(LHS)
                  : ConstantInt::getBool(LHS->getType(), !IsAnd);
@@ -4117,9 +4111,17 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // This catches the 2 variable input case, constants are handled below as a
   // class-like compare.
   if (Pred == FCmpInst::FCMP_ORD || Pred == FCmpInst::FCMP_UNO) {
-    if (FMF.noNaNs() || (isKnownNeverNaN(RHS, /*Depth=*/0, Q) &&
-                         isKnownNeverNaN(LHS, /*Depth=*/0, Q)))
+    KnownFPClass RHSClass =
+        computeKnownFPClass(RHS, fcAllFlags, /*Depth=*/0, Q);
+    KnownFPClass LHSClass =
+        computeKnownFPClass(LHS, fcAllFlags, /*Depth=*/0, Q);
+
+    if (FMF.noNaNs() ||
+        (RHSClass.isKnownNeverNaN() && LHSClass.isKnownNeverNaN()))
       return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
+
+    if (RHSClass.isKnownAlwaysNaN() || LHSClass.isKnownAlwaysNaN())
+      return ConstantInt::get(RetTy, Pred == CmpInst::FCMP_UNO);
   }
 
   const APFloat *C = nullptr;
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 8922fa5898133a..e741a0fc49fb3d 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -104,7 +104,8 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
   // for the Case Statement'" (1994), but builds the MinPartitions array in
   // reverse order to make it easier to reconstruct the partitions in ascending
   // order. In the choice between two optimal partitionings, it picks the one
-  // which yields more jump tables.
+  // which yields more jump tables. The algorithm is described in
+  // https://arxiv.org/pdf/1910.02351v2
 
   // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
   SmallVector<unsigned, 8> MinPartitions(N);
@@ -574,4 +575,4 @@ SwitchCG::SwitchLowering::computeSplitWorkItemInfo(
   assert(FirstRight <= W.LastCluster);
 
   return SplitWorkItemInfo{LastLeft, FirstRight, LeftProb, RightProb};
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index c51852987614c5..cc37d7371cce35 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -169,7 +169,7 @@ std::string GlobalValue::getGlobalIdentifier(StringRef Name,
     else
       GlobalName += FileName;
 
-    GlobalName += kGlobalIdentifierDelimiter;
+    GlobalName += GlobalIdentifierDelimiter;
   }
   GlobalName += Name;
   return GlobalName;
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 5e6d19b9bfa54b..fd55f974115a24 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2203,6 +2203,11 @@ Error Object::removeSections(
           if (auto ToRelSec = RelSec->getSection())
             return !ToRemove(*ToRelSec);
         }
+        // Remove empty group sections.
+        if (Sec->Type == ELF::SHT_GROUP) {
+          auto GroupSec = cast<GroupSection>(Sec.get());
+          return !llvm::all_of(GroupSec->members(), ToRemove);
+        }
         return true;
       });
   if (SymbolTable != nullptr && ToRemove(*SymbolTable))
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index e3c0e7abda16b9..6ccf85387131e4 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -941,6 +941,9 @@ class GroupSection : public SectionBase {
   SmallVector<SectionBase *, 3> GroupMembers;
 
 public:
+  template <class T>
+  using ConstRange = iterator_range<
+      pointee_iterator<typename llvm::SmallVector<T *, 3>::const_iterator>>;
   // TODO: Contents is present in several classes of the hierarchy.
   // This needs to be refactored to avoid duplication.
   ArrayRef<uint8_t> Contents;
@@ -964,6 +967,10 @@ class GroupSection : public SectionBase {
       const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
   void onRemove() override;
 
+  ConstRange<SectionBase> members() const {
+    return make_pointee_range(GroupMembers);
+  }
+
   static bool classof(const SectionBase *S) {
     return S->OriginalType == ELF::SHT_GROUP;
   }
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 913b74c110b364..34f12cf0111cfd 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -997,10 +997,11 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
   return std::string(Relative);
 }
 
-static Error
-writeArchiveToStream(raw_ostream &Out, ArrayRef<NewArchiveMember> NewMembers,
-                     SymtabWritingMode WriteSymtab, object::Archive::Kind Kind,
-                     bool Deterministic, bool Thin, std::optional<bool> IsEC) {
+Error writeArchiveToStream(raw_ostream &Out,
+                           ArrayRef<NewArchiveMember> NewMembers,
+                           SymtabWritingMode WriteSymtab,
+                           object::Archive::Kind Kind, bool Deterministic,
+                           bool Thin, std::optional<bool> IsEC) {
   assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
 
   SmallString<0> SymNamesBuf;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 3c8bf1b9628605..93876e87f20b32 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -401,7 +401,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) {
 
 // See getIRPGOObjectName() for a discription of the format.
 std::pair<StringRef, StringRef> getParsedIRPGOName(StringRef IRPGOName) {
-  auto [FileName, MangledName] = IRPGOName.split(kGlobalIdentifierDelimiter);
+  auto [FileName, MangledName] = IRPGOName.split(GlobalIdentifierDelimiter);
   if (MangledName.empty())
     return std::make_pair(StringRef(), IRPGOName);
   return std::make_pair(FileName, MangledName);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index e18ce5d373d1cb..e7b843362e5b3f 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1049,7 +1049,7 @@ class llvm::InstrProfReaderItaniumRemapper
     // '_Z'; we'll assume that's the mangled name we want.
     std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
     while (true) {
-      Parts = Parts.second.split(kGlobalIdentifierDelimiter);
+      Parts = Parts.second.split(GlobalIdentifierDelimiter);
       if (Parts.first.starts_with("_Z"))
         return Parts.first;
       if (Parts.second.empty())
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47b..294f64636d989c 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -121,7 +121,7 @@ sampleprof_error SampleRecord::merge(const SampleRecord &Other,
   sampleprof_error Result;
   Result = addSamples(Other.getSamples(), Weight);
   for (const auto &I : Other.getCallTargets()) {
-    MergeResult(Result, addCalledTarget(I.first, I.second, Weight));
+    mergeSampleProfErrors(Result, addCalledTarget(I.first, I.second, Weight));
   }
   return Result;
 }
@@ -364,7 +364,7 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
       if (ColdContextFrameLength < MergedContext.size())
         MergedContext = MergedContext.take_back(ColdContextFrameLength);
       // Need to set MergedProfile's context here otherwise it will be lost.
-      FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
+      FunctionSamples &MergedProfile = MergedProfileMap.create(MergedContext);
       MergedProfile.merge(*I.second);
     }
     ProfileMap.erase(I.first);
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index a4b2d0668a5a54..4752465fc072e0 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -355,9 +355,9 @@ std::error_code SampleProfileReaderText::readImpl() {
       SampleContext FContext(FName, CSNameTable);
       if (FContext.hasContext())
         ++CSProfileCount;
-      FunctionSamples &FProfile = Profiles.Create(FContext);
-      MergeResult(Result, FProfile.addTotalSamples(NumSamples));
-      MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
+      FunctionSamples &FProfile = Profiles.create(FContext);
+      mergeSampleProfErrors(Result, FProfile.addTotalSamples(NumSamples));
+      mergeSampleProfErrors(Result, FProfile.addHeadSamples(NumHeadSamples));
       InlineStack.clear();
       InlineStack.push_back(&FProfile);
     } else {
@@ -394,7 +394,7 @@ std::error_code SampleProfileReaderText::readImpl() {
         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
             LineLocation(LineOffset, Discriminator))[FunctionId(FName)];
         FSamples.setFunction(FunctionId(FName));
-        MergeResult(Result, FSamples.addTotalSamples(NumSamples));
+        mergeSampleProfErrors(Result, FSamples.addTotalSamples(NumSamples));
         InlineStack.push_back(&FSamples);
         DepthMetadata = 0;
         break;
@@ -405,13 +405,14 @@ std::error_code SampleProfileReaderText::readImpl() {
         }
         FunctionSamples &FProfile = *InlineStack.back();
         for (const auto &name_count : TargetCountMap) {
-          MergeResult(Result, FProfile.addCalledTargetSamples(
-                                  LineOffset, Discriminator,
-                                  FunctionId(name_count.first),
-                                  name_count.second));
+          mergeSampleProfErrors(Result, FProfile.addCalledTargetSamples(
+                                            LineOffset, Discriminator,
+                                            FunctionId(name_count.first),
+                                            name_count.second));
         }
-        MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
-                                                    NumSamples));
+        mergeSampleProfErrors(
+            Result,
+            FProfile.addBodySamples(LineOffset, Discriminator, NumSamples));
         break;
       }
       case LineType::Metadata: {
@@ -892,12 +893,12 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
         if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
             (!useMD5() && (FuncsToUse.count(FNameString) ||
                            (Remapper && Remapper->exist(FNameString))))) {
-          if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
+          if (!CommonContext || !CommonContext->isPrefixOf(FContext))
             CommonContext = &FContext;
         }
 
         if (CommonContext == &FContext ||
-            (CommonContext && CommonContext->IsPrefixOf(FContext))) {
+            (CommonContext && CommonContext->isPrefixOf(FContext))) {
           // Load profile for the current context which originated from
           // the common ancestor.
           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp
index bd615f0ee76543..f64b1145ebf43d 100644
--- a/llvm/lib/SandboxIR/SandboxIR.cpp
+++ b/llvm/lib/SandboxIR/SandboxIR.cpp
@@ -233,11 +233,11 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
   if (auto *C = dyn_cast<llvm::Constant>(LLVMV)) {
     for (llvm::Value *COp : C->operands())
       getOrCreateValueInternal(COp, C);
-    It->second = std::make_unique<Constant>(C, *this);
+    It->second = std::unique_ptr<Constant>(new Constant(C, *this));
     return It->second.get();
   }
   if (auto *Arg = dyn_cast<llvm::Argument>(LLVMV)) {
-    It->second = std::make_unique<Argument>(Arg, *this);
+    It->second = std::unique_ptr<Argument>(new Argument(Arg, *this));
     return It->second.get();
   }
   if (auto *BB = dyn_cast<llvm::BasicBlock>(LLVMV)) {
@@ -248,14 +248,14 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
     return nullptr;
   }
   assert(isa<llvm::Instruction>(LLVMV) && "Expected Instruction");
-  It->second =
-      std::make_unique<OpaqueInst>(cast<llvm::Instruction>(LLVMV), *this);
+  It->second = std::unique_ptr<OpaqueInst>(
+      new OpaqueInst(cast<llvm::Instruction>(LLVMV), *this));
   return It->second.get();
 }
 
 BasicBlock *Context::createBasicBlock(llvm::BasicBlock *LLVMBB) {
   assert(getValue(LLVMBB) == nullptr && "Already exists!");
-  auto NewBBPtr = std::make_unique<BasicBlock>(LLVMBB, *this);
+  auto NewBBPtr = std::unique_ptr<BasicBlock>(new BasicBlock(LLVMBB, *this));
   auto *BB = cast<BasicBlock>(registerValue(std::move(NewBBPtr)));
   // Create SandboxIR for BB's body.
   BB->buildBasicBlockFromLLVMIR(LLVMBB);
@@ -271,7 +271,7 @@ Value *Context::getValue(llvm::Value *V) const {
 
 Function *Context::createFunction(llvm::Function *F) {
   assert(getValue(F) == nullptr && "Already exists!");
-  auto NewFPtr = std::make_unique<Function>(F, *this);
+  auto NewFPtr = std::unique_ptr<Function>(new Function(F, *this));
   // Create arguments.
   for (auto &Arg : F->args())
     getOrCreateArgument(&Arg);
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index e2a8fb485850f0..72d01f1258013d 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -406,19 +406,19 @@ def FeatureStdExtZcf
 
 def FeatureStdExtZcmp
     : RISCVExtension<"zcmp", 1, 0,
-                     "'Zcmp' (sequenced instuctions for code-size reduction)",
+                     "'Zcmp' (sequenced instructions for code-size reduction)",
                      [FeatureStdExtZca]>;
 def HasStdExtZcmp : Predicate<"Subtarget->hasStdExtZcmp() && !Subtarget->hasStdExtC()">,
                     AssemblerPredicate<(all_of FeatureStdExtZcmp),
-                        "'Zcmp' (sequenced instuctions for code-size reduction)">;
+                        "'Zcmp' (sequenced instructions for code-size reduction)">;
 
 def FeatureStdExtZcmt
     : RISCVExtension<"zcmt", 1, 0,
-                     "'Zcmt' (table jump instuctions for code-size reduction)",
+                     "'Zcmt' (table jump instructions for code-size reduction)",
                      [FeatureStdExtZca, FeatureStdExtZicsr]>;
 def HasStdExtZcmt : Predicate<"Subtarget->hasStdExtZcmt()">,
                            AssemblerPredicate<(all_of FeatureStdExtZcmt),
-                           "'Zcmt' (table jump instuctions for code-size reduction)">;
+                           "'Zcmt' (table jump instructions for code-size reduction)">;
 
 def FeatureStdExtZce
     : RISCVExtension<"zce", 1, 0,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 42d6b03968d747..d72390b7c14b57 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3957,6 +3957,28 @@ class VPatUnaryNoMaskRoundingMode<string intrinsic_name,
                    (XLenVT timm:$round),
                    GPR:$vl, log2sew, TU_MU)>;
 
+class VPatUnaryNoMaskRTZ<string intrinsic_name,
+                         string inst,
+                         string kind,
+                         ValueType result_type,
+                         ValueType op2_type,
+                         int log2sew,
+                         LMULInfo vlmul,
+                         VReg result_reg_class,
+                         VReg op2_reg_class,
+                         bit isSEWAware = 0> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_reg_class:$rs2),
+                   (XLenVT 0b001),
+                   VLOpFrag)),
+                   (!cast<Instruction>(
+                      !if(isSEWAware,
+                          inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+                          inst#"_"#kind#"_"#vlmul.MX))
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_reg_class:$rs2),
+                   GPR:$vl, log2sew, TU_MU)>;
 
 class VPatUnaryMask<string intrinsic_name,
                     string inst,
@@ -4009,6 +4031,31 @@ class VPatUnaryMaskRoundingMode<string intrinsic_name,
                    (XLenVT timm:$round),
                    GPR:$vl, log2sew, (XLenVT timm:$policy))>;
 
+class VPatUnaryMaskRTZ<string intrinsic_name,
+                       string inst,
+                       string kind,
+                       ValueType result_type,
+                       ValueType op2_type,
+                       ValueType mask_type,
+                       int log2sew,
+                       LMULInfo vlmul,
+                       VReg result_reg_class,
+                       VReg op2_reg_class,
+                       bit isSEWAware = 0> :
+  Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_reg_class:$rs2),
+                   (mask_type V0),
+                   (XLenVT 0b001),
+                   VLOpFrag, (XLenVT timm:$policy))),
+                   (!cast<Instruction>(
+                      !if(isSEWAware,
+                          inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+                          inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
+                   (result_type result_reg_class:$merge),
+                   (op2_type op2_reg_class:$rs2),
+                   (mask_type V0),
+                   GPR:$vl, log2sew, (XLenVT timm:$policy))>;
 
 class VPatMaskUnaryNoMask<string intrinsic_name,
                           string inst,
@@ -4826,6 +4873,25 @@ multiclass VPatConversionRoundingMode<string intrinsic,
                                   op1_reg_class, isSEWAware>;
 }
 
+multiclass VPatConversionRTZ<string intrinsic,
+                             string inst,
+                             string kind,
+                             ValueType result_type,
+                             ValueType op1_type,
+                             ValueType mask_type,
+                             int log2sew,
+                             LMULInfo vlmul,
+                             VReg result_reg_class,
+                             VReg op1_reg_class,
+                             bit isSEWAware = 0> {
+  def : VPatUnaryNoMaskRTZ<intrinsic, inst, kind, result_type, op1_type,
+                                    log2sew, vlmul, result_reg_class,
+                                    op1_reg_class, isSEWAware>;
+  def : VPatUnaryMaskRTZ<intrinsic, inst, kind, result_type, op1_type,
+                                  mask_type, log2sew, vlmul, result_reg_class,
+                                  op1_reg_class, isSEWAware>;
+}
+
 multiclass VPatBinaryV_VV<string intrinsic, string instruction,
                           list<VTypeInfo> vtilist, bit isSEWAware = 0> {
   foreach vti = vtilist in
@@ -5776,6 +5842,18 @@ multiclass VPatConversionVI_VF_RM<string intrinsic,
   }
 }
 
+multiclass VPatConversionVI_VF_RTZ<string intrinsic, 
+                                           string instruction> {
+  foreach fvti = AllFloatVectors in {
+    defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+    let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+                                 GetVTypePredicates<ivti>.Predicates) in
+    defm : VPatConversionRTZ<intrinsic, instruction, "V",
+                                              ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+                                              fvti.LMul, ivti.RegClass, fvti.RegClass>;
+  }
+}
+
 multiclass VPatConversionVF_VI_RM<string intrinsic, string instruction,
                                   bit isSEWAware = 0> {
   foreach fvti = AllFloatVectors in {
@@ -5813,6 +5891,18 @@ multiclass VPatConversionWI_VF_RM<string intrinsic, string instruction> {
   }
 }
 
+multiclass VPatConversionWI_VF_RTZ<string intrinsic, string instruction> {
+  foreach fvtiToFWti = AllWidenableFloatVectors in {
+    defvar fvti = fvtiToFWti.Vti;
+    defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+    let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+                                 GetVTypePredicates<iwti>.Predicates) in
+    defm : VPatConversionRTZ<intrinsic, instruction, "V",
+                             iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
+                             fvti.LMul, iwti.RegClass, fvti.RegClass>;
+  }
+}
+
 multiclass VPatConversionWF_VI<string intrinsic, string instruction,
                                bit isSEWAware = 0> {
   foreach vtiToWti = AllWidenableIntToFloatVectors in {
@@ -5879,6 +5969,18 @@ multiclass VPatConversionVI_WF_RM <string intrinsic, string instruction> {
   }
 }
 
+multiclass VPatConversionVI_WF_RTZ <string intrinsic, string instruction> {
+  foreach vtiToWti = AllWidenableIntToFloatVectors in {
+    defvar vti = vtiToWti.Vti;
+    defvar fwti = vtiToWti.Wti;
+    let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+                                 GetVTypePredicates<fwti>.Predicates) in
+    defm : VPatConversionRTZ<intrinsic, instruction, "W",
+                             vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+                             vti.LMul, vti.RegClass, fwti.RegClass>;
+  }
+}
+
 multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction,
                                    bit isSEWAware = 0> {
   foreach fvtiToFWti = AllWidenableFloatVectors in {
@@ -5921,6 +6023,20 @@ multiclass VPatConversionVF_WF_RM<string intrinsic, string instruction,
   }
 }
 
+multiclass VPatConversionVF_WF_RTZ<string intrinsic, string instruction,
+                                   list<VTypeInfoToWide> wlist = AllWidenableFloatVectors,
+                                   bit isSEWAware = 0> {
+  foreach fvtiToFWti = wlist in {
+    defvar fvti = fvtiToFWti.Vti;
+    defvar fwti = fvtiToFWti.Wti;
+    let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+                                 GetVTypePredicates<fwti>.Predicates) in
+    defm : VPatConversionRTZ<intrinsic, instruction, "W",
+                             fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+                             fvti.LMul, fvti.RegClass, fwti.RegClass, isSEWAware>;
+  }
+}
+
 multiclass VPatConversionVF_WF_BF_RM<string intrinsic, string instruction,
                                      bit isSEWAware = 0> {
   foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
@@ -7153,6 +7269,8 @@ foreach fvti = AllFloatVectors in {
 //===----------------------------------------------------------------------===//
 // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
 //===----------------------------------------------------------------------===//
+defm : VPatConversionVI_VF_RTZ<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_RTZ_X_F">;
+defm : VPatConversionVI_VF_RTZ<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
 defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
 defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
 defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
@@ -7165,6 +7283,8 @@ defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU",
 //===----------------------------------------------------------------------===//
 // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
 //===----------------------------------------------------------------------===//
+defm : VPatConversionWI_VF_RTZ<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
+defm : VPatConversionWI_VF_RTZ<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_RTZ_X_F">;
 defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
 defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
 defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
@@ -7181,6 +7301,8 @@ defm : VPatConversionWF_VF_BF<"int_riscv_vfwcvtbf16_f_f_v",
 //===----------------------------------------------------------------------===//
 // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
 //===----------------------------------------------------------------------===//
+defm : VPatConversionVI_WF_RTZ<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
+defm : VPatConversionVI_WF_RTZ<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
 defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
 defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
 defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index fdd7d5f1ee0e73..8cb003b838d06b 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1821,10 +1821,10 @@ def : ProcModel<"pantherlake", AlderlakePModel,
                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
 def : ProcModel<"clearwaterforest", AlderlakePModel,
                 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"emeraldrapids", SapphireRapidsModel,
+                ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 def : ProcModel<"graniterapids", SapphireRapidsModel,
                 ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
-def : ProcModel<"emeraldrapids", SapphireRapidsModel,
-                ProcessorFeatures.SPRFeatures, ProcessorFeatures.GNRTuning>;
 foreach P = ["graniterapids-d", "graniterapids_d"] in {
 def : ProcModel<P, SapphireRapidsModel,
                 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index 1d077326e4cf23..0229b5a140f91b 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -395,6 +395,17 @@ static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
   return getError(Error);
 }
 
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::createFromExtMap(unsigned XLen,
+                               const RISCVISAUtils::OrderedExtensionMap &Exts) {
+  assert(XLen == 32 || XLen == 64);
+  std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+
+  ISAInfo->Exts = Exts;
+
+  return RISCVISAInfo::postProcessAndChecking(std::move(ISAInfo));
+}
+
 llvm::Expected<std::unique_ptr<RISCVISAInfo>>
 RISCVISAInfo::parseFeatures(unsigned XLen,
                             const std::vector<std::string> &Features) {
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 00df92e0aadeda..cd48575f143a61 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -616,18 +616,19 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
   return IsWave32Capable;
 }
 
-bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
-                                   StringMap<bool> &Features,
-                                   std::string &ErrorMsg) {
+std::pair<FeatureError, StringRef>
+AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
+                              StringMap<bool> &Features) {
   bool IsWave32Capable = isWave32Capable(GPU, T);
   const bool IsNullGPU = GPU.empty();
-  // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
-  const bool HaveWave32 =
-      (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
+  const bool HaveWave32 = Features.count("wavefrontsize32");
   const bool HaveWave64 = Features.count("wavefrontsize64");
   if (HaveWave32 && HaveWave64) {
-    ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
-    return false;
+    return {AMDGPU::INVALID_FEATURE_COMBINATION,
+            "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"};
+  }
+  if (HaveWave32 && !IsNullGPU && !IsWave32Capable) {
+    return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"};
   }
   // Don't assume any wavesize with an unknown subtarget.
   if (!IsNullGPU) {
@@ -638,5 +639,5 @@ bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
       Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
     }
   }
-  return true;
+  return {NO_ERROR, StringRef()};
 }
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index f7a54d428f202f..f878e3e591a05a 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -624,7 +624,7 @@ void SampleContextTracker::createContextLessProfileMap(
     FunctionSamples *FProfile = Node->getFunctionSamples();
     // Profile's context can be empty, use ContextNode's func name.
     if (FProfile)
-      ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
+      ContextLessProfiles.create(Node->getFuncName()).merge(*FProfile);
   }
 }
 } // namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index d11b0b76b28c58..13c0e0d0abff0c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1572,7 +1572,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
               FunctionId(FunctionSamples::getCanonicalFnName(Callee->getName()))];
         OutlineFS->merge(*FS, 1);
         // Set outlined profile to be synthetic to not bias the inliner.
-        OutlineFS->SetContextSynthetic();
+        OutlineFS->setContextSynthetic();
       }
     } else {
       auto pair =
@@ -1586,7 +1586,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
 static SmallVector<InstrProfValueData, 2>
 GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M) {
   SmallVector<InstrProfValueData, 2> R;
-  for (const auto &I : SampleRecord::SortCallTargets(M)) {
+  for (const auto &I : SampleRecord::sortCallTargets(M)) {
     R.emplace_back(
         InstrProfValueData{I.first.getHashCode(), I.second});
   }
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 91a5830fa8e9e0..3adbc37c4ddaa1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -231,6 +231,11 @@ static bool isValidElementType(Type *Ty) {
          !Ty->isPPC_FP128Ty();
 }
 
+/// \returns the vector type of ScalarTy based on vectorization factor.
+static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) {
+  return FixedVectorType::get(ScalarTy, VF);
+}
+
 /// \returns True if the value is a constant (but not globals/constant
 /// expressions).
 static bool isConstant(Value *V) {
@@ -1480,8 +1485,7 @@ class BoUpSLP {
           if (getUnderlyingObject(LI1->getPointerOperand()) ==
                   getUnderlyingObject(LI2->getPointerOperand()) &&
               R.TTI->isLegalMaskedGather(
-                  FixedVectorType::get(LI1->getType(), NumLanes),
-                  LI1->getAlign()))
+                  getWidenedType(LI1->getType(), NumLanes), LI1->getAlign()))
             return LookAheadHeuristics::ScoreMaskedGatherCandidate;
           return CheckSameEntryOrFail();
         }
@@ -4114,7 +4118,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
   int NumScalars = GatheredScalars.size();
   if (!isValidElementType(ScalarTy))
     return std::nullopt;
-  auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars);
+  auto *VecTy = getWidenedType(ScalarTy, NumScalars);
   int NumParts = TTI->getNumberOfParts(VecTy);
   if (NumParts == 0 || NumParts >= NumScalars)
     NumParts = 1;
@@ -4458,7 +4462,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
   }
 
   Order.clear();
-  auto *VecTy = FixedVectorType::get(ScalarTy, Sz);
+  auto *VecTy = getWidenedType(ScalarTy, Sz);
   // Check the order of pointer operands or that all pointers are the same.
   bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order);
   // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
@@ -4577,7 +4581,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                   /*VariableMask=*/false, CommonAlignment, CostKind) +
               VectorGEPCost - ScalarGEPCost;
           InstructionCost VecLdCost = 0;
-          auto *SubVecTy = FixedVectorType::get(ScalarTy, VF);
+          auto *SubVecTy = getWidenedType(ScalarTy, VF);
           for (auto [I, LS] : enumerate(States)) {
             auto *LI0 = cast<LoadInst>(VL[I * VF]);
             switch (LS) {
@@ -4847,8 +4851,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
       }
     }
     if (Sz == 2 && TE.getVectorFactor() == 4 &&
-        TTI->getNumberOfParts(FixedVectorType::get(
-            TE.Scalars.front()->getType(), 2 * TE.getVectorFactor())) == 1)
+        TTI->getNumberOfParts(getWidenedType(TE.Scalars.front()->getType(),
+                                             2 * TE.getVectorFactor())) == 1)
       return std::nullopt;
     if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
                                                      Sz)) {
@@ -5020,7 +5024,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
           find_if(TE.Scalars, [](Value *V) { return !isConstant(V); });
       if (It == TE.Scalars.begin())
         return OrdersType();
-      auto *Ty = FixedVectorType::get(TE.Scalars.front()->getType(), Sz);
+      auto *Ty = getWidenedType(TE.Scalars.front()->getType(), Sz);
       if (It != TE.Scalars.end()) {
         OrdersType Order(Sz, Sz);
         unsigned Idx = std::distance(TE.Scalars.begin(), It);
@@ -5156,7 +5160,7 @@ void BoUpSLP::reorderTopToBottom() {
     // to take into account their order when looking for the most used order.
     if (TE->isAltShuffle()) {
       VectorType *VecTy =
-          FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
+          getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
       unsigned Opcode0 = TE->getOpcode();
       unsigned Opcode1 = TE->getAltOpcode();
       SmallBitVector OpcodeMask(getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
@@ -6073,7 +6077,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
   unsigned Opcode1 = S.getAltOpcode();
   SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
   // If this pattern is supported by the target then consider it profitable.
-  if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()),
+  if (TTI->isLegalAltInstr(getWidenedType(S.MainOp->getType(), VL.size()),
                            Opcode0, Opcode1, OpcodeMask))
     return true;
   SmallVector<ValueList> Operands;
@@ -7380,7 +7384,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
 
   if (!isValidElementType(EltTy))
     return 0;
-  uint64_t VTSize = DL->getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
+  uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
   if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
       VTSize != DL->getTypeStoreSizeInBits(T))
     return 0;
@@ -7945,7 +7949,7 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
         Index + NumSrcElts <= static_cast<int>(Mask.size()))
       return TTI.getShuffleCost(
           TTI::SK_InsertSubvector,
-          FixedVectorType::get(Tp->getElementType(), Mask.size()), Mask,
+          getWidenedType(Tp->getElementType(), Mask.size()), Mask,
           TTI::TCK_RecipThroughput, Index, Tp);
   }
   return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
@@ -8044,7 +8048,7 @@ void BoUpSLP::transformNodes() {
       if (E.State != TreeEntry::Vectorize)
         break;
       Type *ScalarTy = E.getMainOp()->getType();
-      auto *VecTy = FixedVectorType::get(ScalarTy, E.Scalars.size());
+      auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
       Align CommonAlignment = computeCommonAlignment<LoadInst>(E.Scalars);
       // Check if profitable to represent consecutive load + reverse as strided
       // load with stride -1.
@@ -8071,7 +8075,7 @@ void BoUpSLP::transformNodes() {
     case Instruction::Store: {
       Type *ScalarTy =
           cast<StoreInst>(E.getMainOp())->getValueOperand()->getType();
-      auto *VecTy = FixedVectorType::get(ScalarTy, E.Scalars.size());
+      auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
       Align CommonAlignment = computeCommonAlignment<StoreInst>(E.Scalars);
       // Check if profitable to represent consecutive load + reverse as strided
       // load with stride -1.
@@ -8141,7 +8145,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
   InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
     if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
       return TTI::TCC_Free;
-    auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+    auto *VecTy = getWidenedType(ScalarTy, VL.size());
     InstructionCost GatherCost = 0;
     SmallVector<Value *> Gathers(VL.begin(), VL.end());
     // Improve gather cost for gather of loads, if we can group some of the
@@ -8239,7 +8243,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
                                   LI->getAlign(), LI->getPointerAddressSpace(),
                                   CostKind, TTI::OperandValueInfo(), LI);
         }
-        auto *LoadTy = FixedVectorType::get(VL.front()->getType(), VF);
+        auto *LoadTy = getWidenedType(VL.front()->getType(), VF);
         for (const std::pair<unsigned, LoadsState> &P : VectorizedStarts) {
           auto *LI = cast<LoadInst>(VL[P.first]);
           Align Alignment = LI->getAlign();
@@ -8277,7 +8281,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
             // TODO: improve checks if GEPs can be vectorized.
             Value *Ptr0 = PointerOps.front();
             Type *ScalarTy = Ptr0->getType();
-            auto *VecTy = FixedVectorType::get(ScalarTy, VF);
+            auto *VecTy = getWidenedType(ScalarTy, VF);
             auto [ScalarGEPCost, VectorGEPCost] =
                 getGEPCosts(TTI, PointerOps, Ptr0, Instruction::GetElementPtr,
                             CostKind, ScalarTy, VecTy);
@@ -8432,33 +8436,32 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         if (*ShuffleKinds[Part] != TTI::SK_PermuteSingleSrc ||
             !ShuffleVectorInst::isIdentityMask(
                 MaskSlice, std::max<unsigned>(NumElts, MaskSlice.size())))
-          Cost += ::getShuffleCost(TTI, *ShuffleKinds[Part],
-                                   FixedVectorType::get(ScalarTy, NumElts),
-                                   MaskSlice);
+          Cost +=
+              ::getShuffleCost(TTI, *ShuffleKinds[Part],
+                               getWidenedType(ScalarTy, NumElts), MaskSlice);
         continue;
       }
       if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
           !ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
-        Cost += ::getShuffleCost(TTI, *RegShuffleKind,
-                                 FixedVectorType::get(ScalarTy, EltsPerVector),
-                                 SubMask);
+        Cost +=
+            ::getShuffleCost(TTI, *RegShuffleKind,
+                             getWidenedType(ScalarTy, EltsPerVector), SubMask);
       }
       for (unsigned Idx : Indices) {
         assert((Idx + EltsPerVector) <= alignTo(NumElts, EltsPerVector) &&
                "SK_ExtractSubvector index out of range");
         Cost += ::getShuffleCost(
             TTI, TTI::SK_ExtractSubvector,
-            FixedVectorType::get(ScalarTy, alignTo(NumElts, EltsPerVector)),
+            getWidenedType(ScalarTy, alignTo(NumElts, EltsPerVector)),
             std::nullopt, CostKind, Idx,
-            FixedVectorType::get(ScalarTy, EltsPerVector));
+            getWidenedType(ScalarTy, EltsPerVector));
       }
       // Second attempt to check, if just a permute is better estimated than
       // subvector extract.
       SubMask.assign(NumElts, PoisonMaskElem);
       copy(MaskSlice, SubMask.begin());
-      InstructionCost OriginalCost =
-          ::getShuffleCost(TTI, *ShuffleKinds[Part],
-                           FixedVectorType::get(ScalarTy, NumElts), SubMask);
+      InstructionCost OriginalCost = ::getShuffleCost(
+          TTI, *ShuffleKinds[Part], getWidenedType(ScalarTy, NumElts), SubMask);
       if (OriginalCost < Cost)
         Cost = OriginalCost;
     }
@@ -8593,9 +8596,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
         if (DstSz > SrcSz)
           CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
-        return TTI.getCastInstrCost(CastOpcode,
-                                    FixedVectorType::get(ScalarTy, VF),
-                                    FixedVectorType::get(EScalarTy, VF),
+        return TTI.getCastInstrCost(CastOpcode, getWidenedType(ScalarTy, VF),
+                                    getWidenedType(EScalarTy, VF),
                                     TTI::CastContextHint::None, CostKind);
       }
       return TTI::TCC_Free;
@@ -8650,8 +8652,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
                      GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
       }
-      V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
-      V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+      V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
+      V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
     } else if (!V1 && P2.isNull()) {
       // Shuffle single entry node.
       const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8671,7 +8673,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         CommonVF = E->Scalars.size();
       }
       ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
-      V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+      V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
       // Not identity/broadcast? Try to see if the original vector is better.
       if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
           CommonVF == CommonMask.size() &&
@@ -8716,10 +8718,10 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
         CommonVF = VF;
       }
       ExtraCost += GetValueMinBWAffectedCost(V1);
-      V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+      V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
       ExtraCost += GetNodeMinBWAffectedCost(
           *E2, std::min(CommonVF, E2->getVectorFactor()));
-      V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+      V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
     } else if (!V1 && V2) {
       // Shuffle vector and tree node.
       unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
@@ -8745,9 +8747,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       }
       ExtraCost += GetNodeMinBWAffectedCost(
           *E1, std::min(CommonVF, E1->getVectorFactor()));
-      V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+      V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
       ExtraCost += GetValueMinBWAffectedCost(V2);
-      V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+      V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
     } else {
       assert(V1 && V2 && "Expected both vectors.");
       unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -8761,17 +8763,17 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       ExtraCost +=
           GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
       if (V1->getType() != V2->getType()) {
-        V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
-        V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+        V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
+        V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
       } else {
         if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
-          V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+          V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
         if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
-          V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+          V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
       }
     }
-    InVectors.front() = Constant::getNullValue(
-        FixedVectorType::get(ScalarTy, CommonMask.size()));
+    InVectors.front() =
+        Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
     if (InVectors.size() == 2)
       InVectors.pop_back();
     return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
@@ -8880,8 +8882,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
     SameNodesEstimated = false;
     if (NumParts != 1 && UniqueBases.size() != 1) {
       UseVecBaseAsInput = true;
-      VecBase = Constant::getNullValue(
-          FixedVectorType::get(ScalarTy, CommonMask.size()));
+      VecBase =
+          Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
     }
     return VecBase;
   }
@@ -8909,7 +8911,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       return;
     }
     assert(!CommonMask.empty() && "Expected non-empty common mask.");
-    auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
+    auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
     unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
     if (NumParts == 0 || NumParts >= Mask.size())
       NumParts = 1;
@@ -8926,7 +8928,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       return;
     }
     assert(!CommonMask.empty() && "Expected non-empty common mask.");
-    auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
+    auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
     unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
     if (NumParts == 0 || NumParts >= Mask.size())
       NumParts = 1;
@@ -9125,12 +9127,12 @@ static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
         continue;
       }
       if (MinBW > 0) {
-        ArgTys.push_back(FixedVectorType::get(
-            IntegerType::get(CI->getContext(), MinBW), VF));
+        ArgTys.push_back(
+            getWidenedType(IntegerType::get(CI->getContext(), MinBW), VF));
         continue;
       }
     }
-    ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
+    ArgTys.push_back(getWidenedType(Arg->getType(), VF));
   }
   return ArgTys;
 }
@@ -9159,9 +9161,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
   Type *OrigScalarTy = ScalarTy;
   if (It != MinBWs.end())
     ScalarTy = IntegerType::get(F->getContext(), It->second.first);
-  auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+  auto *VecTy = getWidenedType(ScalarTy, VL.size());
   unsigned EntryVF = E->getVectorFactor();
-  auto *FinalVecTy = FixedVectorType::get(ScalarTy, EntryVF);
+  auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
 
   bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
   if (E->State == TreeEntry::NeedToGather) {
@@ -9257,7 +9259,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
               unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
               unsigned VecOpcode;
               auto *UserVecTy =
-                  FixedVectorType::get(UserScalarTy, E->getVectorFactor());
+                  getWidenedType(UserScalarTy, E->getVectorFactor());
               if (BWSz > SrcBWSz)
                 VecOpcode = Instruction::Trunc;
               else
@@ -9329,7 +9331,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
           NumElts = ATy->getNumElements();
         else
           NumElts = AggregateTy->getStructNumElements();
-        SrcVecTy = FixedVectorType::get(OrigScalarTy, NumElts);
+        SrcVecTy = getWidenedType(OrigScalarTy, NumElts);
       }
       if (I->hasOneUse()) {
         Instruction *Ext = I->user_back();
@@ -9423,7 +9425,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
     // need to shift the vector.
     // Do not calculate the cost if the actual size is the register size and
     // we can merge this shuffle with the following SK_Select.
-    auto *InsertVecTy = FixedVectorType::get(ScalarTy, InsertVecSz);
+    auto *InsertVecTy = getWidenedType(ScalarTy, InsertVecSz);
     if (!IsIdentity)
       Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
                                   InsertVecTy, Mask);
@@ -9439,7 +9441,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
                       buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask));
     if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
       if (InsertVecSz != VecSz) {
-        auto *ActualVecTy = FixedVectorType::get(ScalarTy, VecSz);
+        auto *ActualVecTy = getWidenedType(ScalarTy, VecSz);
         Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
                                     std::nullopt, CostKind, OffsetBeg - Offset,
                                     InsertVecTy);
@@ -9473,7 +9475,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
   case Instruction::BitCast: {
     auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
     Type *SrcScalarTy = VL0->getOperand(0)->getType();
-    auto *SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+    auto *SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
     unsigned Opcode = ShuffleOrOp;
     unsigned VecOpcode = Opcode;
     if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
@@ -9483,7 +9485,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       if (SrcIt != MinBWs.end()) {
         SrcBWSz = SrcIt->second.first;
         SrcScalarTy = IntegerType::get(F->getContext(), SrcBWSz);
-        SrcVecTy = FixedVectorType::get(SrcScalarTy, VL.size());
+        SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
       }
       unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
       if (BWSz == SrcBWSz) {
@@ -9551,7 +9553,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
                                      VI);
     };
     auto GetVectorCost = [&](InstructionCost CommonCost) {
-      auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
+      auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
 
       InstructionCost VecCost = TTI->getCmpSelInstrCost(
           E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
@@ -9781,7 +9783,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
         VecCost +=
             TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
       } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
-        auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
+        auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
         VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
                                             CI0->getPredicate(), CostKind, VL0);
         VecCost += TTIRef.getCmpSelInstrCost(
@@ -9790,7 +9792,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
             E->getAltOp());
       } else {
         Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType();
-        auto *SrcTy = FixedVectorType::get(SrcSclTy, VL.size());
+        auto *SrcTy = getWidenedType(SrcSclTy, VL.size());
         if (SrcSclTy->isIntegerTy() && ScalarTy->isIntegerTy()) {
           auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
           unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
@@ -9799,7 +9801,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
           if (SrcIt != MinBWs.end()) {
             SrcBWSz = SrcIt->second.first;
             SrcSclTy = IntegerType::get(SrcSclTy->getContext(), SrcBWSz);
-            SrcTy = FixedVectorType::get(SrcSclTy, VL.size());
+            SrcTy = getWidenedType(SrcSclTy, VL.size());
           }
           if (BWSz <= SrcBWSz) {
             if (BWSz < SrcBWSz)
@@ -10136,7 +10138,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
         auto *ScalarTy = II->getType();
         if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
           ScalarTy = VectorTy->getElementType();
-        V.push_back(FixedVectorType::get(ScalarTy, BundleWidth));
+        V.push_back(getWidenedType(ScalarTy, BundleWidth));
       }
       Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
     }
@@ -10424,9 +10426,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
               TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
               InstructionCost C = TTI->getCastInstrCost(
                   VecOpcode, FTy,
-                  FixedVectorType::get(
-                      IntegerType::get(FTy->getContext(), BWSz),
-                      FTy->getNumElements()),
+                  getWidenedType(IntegerType::get(FTy->getContext(), BWSz),
+                                 FTy->getNumElements()),
                   TTI::CastContextHint::None, CostKind);
               LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
                                 << " for extending externally used vector with "
@@ -10481,13 +10482,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
     // If we plan to rewrite the tree in a smaller type, we will need to sign
     // extend the extracted value back to the original type. Here, we account
     // for the extract and the added cost of the sign extend if needed.
-    auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
+    auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
     auto It = MinBWs.find(getTreeEntry(EU.Scalar));
     if (It != MinBWs.end()) {
       auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
       unsigned Extend =
           It->second.second ? Instruction::SExt : Instruction::ZExt;
-      VecTy = FixedVectorType::get(MinTy, BundleWidth);
+      VecTy = getWidenedType(MinTy, BundleWidth);
       ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
                                                    VecTy, EU.Lane);
     } else {
@@ -10529,9 +10530,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
       SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
       std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
                 OrigMask.begin());
-      C = TTI->getShuffleCost(
-          TTI::SK_PermuteSingleSrc,
-          FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask);
+      C = TTI->getShuffleCost(TTI::SK_PermuteSingleSrc,
+                              getWidenedType(TE->getMainOp()->getType(), VecVF),
+                              OrigMask);
       LLVM_DEBUG(
           dbgs() << "SLP: Adding cost " << C
                  << " for final shuffle of insertelement external users.\n";
@@ -10553,8 +10554,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
       if (TEs.size() == 1) {
         if (VF == 0)
           VF = TEs.front()->getVectorFactor();
-        auto *FTy =
-            FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
+        auto *FTy = getWidenedType(TEs.back()->Scalars.front()->getType(), VF);
         if (!ShuffleVectorInst::isIdentityMask(Mask, VF) &&
             !all_of(enumerate(Mask), [=](const auto &Data) {
               return Data.value() == PoisonMaskElem ||
@@ -10578,8 +10578,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
           else
             VF = Mask.size();
         }
-        auto *FTy =
-            FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
+        auto *FTy = getWidenedType(TEs.back()->Scalars.front()->getType(), VF);
         InstructionCost C =
             ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
         LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
@@ -10614,9 +10613,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
       if (SrcSize < DstSize)
         Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
       auto *SrcVecTy =
-          FixedVectorType::get(Builder.getIntNTy(SrcSize), E.getVectorFactor());
+          getWidenedType(Builder.getIntNTy(SrcSize), E.getVectorFactor());
       auto *DstVecTy =
-          FixedVectorType::get(Builder.getIntNTy(DstSize), E.getVectorFactor());
+          getWidenedType(Builder.getIntNTy(DstSize), E.getVectorFactor());
       TTI::CastContextHint CCH = getCastContextHint(E);
       InstructionCost CastCost;
       switch (E.getOpcode()) {
@@ -11215,7 +11214,7 @@ BoUpSLP::isGatherShuffledEntry(
 
 InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
                                        Type *ScalarTy) const {
-  auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+  auto *VecTy = getWidenedType(ScalarTy, VL.size());
   bool DuplicateNonConst = false;
   // Find the cost of inserting/extracting values from the vector.
   // Check if the same elements are inserted several times and count them as
@@ -11523,7 +11522,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
     }
     return Vec;
   };
-  auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+  auto *VecTy = getWidenedType(ScalarTy, VL.size());
   Value *Vec = Root ? Root : PoisonValue::get(VecTy);
   SmallVector<int> NonConsts;
   // Insert constant values at first.
@@ -11666,7 +11665,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
     }
     Value *createIdentity(Value *V) { return V; }
     Value *createPoison(Type *Ty, unsigned VF) {
-      return PoisonValue::get(FixedVectorType::get(Ty, VF));
+      return PoisonValue::get(getWidenedType(Ty, VF));
     }
     /// Resizes 2 input vector to match the sizes, if the they are not equal
     /// yet. The smallest vector is resized to the size of the larger vector.
@@ -11870,7 +11869,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
       return std::nullopt;
     // Postpone gather emission, will be emitted after the end of the
     // process to keep correct order.
-    auto *ResVecTy = FixedVectorType::get(ScalarTy, E->getVectorFactor());
+    auto *ResVecTy = getWidenedType(ScalarTy, E->getVectorFactor());
     return Builder.CreateAlignedLoad(
         ResVecTy,
         PoisonValue::get(PointerType::getUnqual(ScalarTy->getContext())),
@@ -12241,7 +12240,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
   SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
   SmallVector<SmallVector<const TreeEntry *>> Entries;
   Type *OrigScalarTy = GatheredScalars.front()->getType();
-  auto *VecTy = FixedVectorType::get(ScalarTy, GatheredScalars.size());
+  auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
   unsigned NumParts = TTI->getNumberOfParts(VecTy);
   if (NumParts == 0 || NumParts >= GatheredScalars.size())
     NumParts = 1;
@@ -12637,7 +12636,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
   auto It = MinBWs.find(E);
   if (It != MinBWs.end())
     ScalarTy = IntegerType::get(F->getContext(), It->second.first);
-  auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
+  auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
   if (E->State == TreeEntry::NeedToGather) {
     // Set insert point for non-reduction initial nodes.
     if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
@@ -12784,7 +12783,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
         assert(Res.first > 0 && "Expected item in MinBWs.");
         V = Builder.CreateIntCast(
             V,
-            FixedVectorType::get(
+            getWidenedType(
                 ScalarTy,
                 cast<FixedVectorType>(V->getType())->getNumElements()),
             Res.second);
@@ -13368,8 +13367,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
         if (cast<VectorType>(OpVec->getType())->getElementType() !=
                 ScalarArg->getType() &&
             It == MinBWs.end()) {
-          auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
-                                              VecTy->getNumElements());
+          auto *CastTy =
+              getWidenedType(ScalarArg->getType(), VecTy->getNumElements());
           OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
         } else if (It != MinBWs.end()) {
           OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
@@ -13868,7 +13867,7 @@ Value *BoUpSLP::vectorizeTree(
                 Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
               Vec = Builder.CreateIntCast(
                   Vec,
-                  FixedVectorType::get(
+                  getWidenedType(
                       ScalarTy,
                       cast<FixedVectorType>(Vec->getType())->getNumElements()),
                   BWIt->second.second);
@@ -14229,8 +14228,8 @@ void BoUpSLP::optimizeGatherSequence() {
     return SM1.size() - LastUndefsCnt > 1 &&
            TTI->getNumberOfParts(SI1->getType()) ==
                TTI->getNumberOfParts(
-                   FixedVectorType::get(SI1->getType()->getElementType(),
-                                        SM1.size() - LastUndefsCnt));
+                   getWidenedType(SI1->getType()->getElementType(),
+                                  SM1.size() - LastUndefsCnt));
   };
   // Perform O(N^2) search over the gather/shuffle sequences and merge identical
   // instructions. TODO: We can further optimize this scan if we split the
@@ -15033,8 +15032,8 @@ bool BoUpSLP::collectValuesToDemote(
       const unsigned VF = E.Scalars.size();
       Type *OrigScalarTy = E.Scalars.front()->getType();
       if (UniqueBases.size() <= 2 ||
-          TTI->getNumberOfParts(FixedVectorType::get(OrigScalarTy, VF)) ==
-              TTI->getNumberOfParts(FixedVectorType::get(
+          TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
+              TTI->getNumberOfParts(getWidenedType(
                   IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
         ToDemote.push_back(E.Idx);
     }
@@ -15292,8 +15291,7 @@ bool BoUpSLP::collectValuesToDemote(
       unsigned MinBW = PowerOf2Ceil(BitWidth);
       SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
       auto VecCallCosts = getVectorCallCosts(
-          IC,
-          FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF),
+          IC, getWidenedType(IntegerType::get(IC->getContext(), MinBW), VF),
           TTI, TLI, ArgTys);
       InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
       if (Cost < BestCost) {
@@ -15378,8 +15376,7 @@ void BoUpSLP::computeMinimumValueSizes() {
                [&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
       return 0u;
 
-    unsigned NumParts =
-        TTI->getNumberOfParts(FixedVectorType::get(TreeRootIT, VF));
+    unsigned NumParts = TTI->getNumberOfParts(getWidenedType(TreeRootIT, VF));
 
     // The maximum bit width required to represent all the values that can be
     // demoted without loss of precision. It would be safe to truncate the roots
@@ -15434,7 +15431,7 @@ void BoUpSLP::computeMinimumValueSizes() {
     // use - ignore it.
     if (NumParts > 1 &&
         NumParts ==
-            TTI->getNumberOfParts(FixedVectorType::get(
+            TTI->getNumberOfParts(getWidenedType(
                 IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
       return 0u;
 
@@ -16285,7 +16282,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
     // No actual vectorization should happen, if number of parts is the same as
     // provided vectorization factor (i.e. the scalar type is used for vector
     // code during codegen).
-    auto *VecTy = FixedVectorType::get(ScalarTy, VF);
+    auto *VecTy = getWidenedType(ScalarTy, VF);
     if (TTI->getNumberOfParts(VecTy) == VF)
       continue;
     for (unsigned I = NextInst; I < MaxInst; ++I) {
@@ -17569,7 +17566,7 @@ class HorizontalReduction {
                                    FastMathFlags FMF) {
     TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
     Type *ScalarTy = ReducedVals.front()->getType();
-    FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
+    FixedVectorType *VectorTy = getWidenedType(ScalarTy, ReduxWidth);
     InstructionCost VectorCost = 0, ScalarCost;
     // If all of the reduced values are constant, the vector cost is 0, since
     // the reduction value can be calculated at the compile time.
@@ -17725,7 +17722,7 @@ class HorizontalReduction {
     if (VTy->getElementType() != VL.front()->getType()) {
       VectorizedValue = Builder.CreateIntCast(
           VectorizedValue,
-          FixedVectorType::get(VL.front()->getType(), VTy->getNumElements()),
+          getWidenedType(VL.front()->getType(), VTy->getNumElements()),
           any_of(VL, [&](Value *R) {
             KnownBits Known = computeKnownBits(
                 R, cast<Instruction>(ReductionOps.front().front())
@@ -18576,6 +18573,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         },
         /*MaxVFOnly=*/true, R);
     Changed |= HaveVectorizedPhiNodes;
+    if (HaveVectorizedPhiNodes && any_of(PHIToOpcodes, [&](const auto &P) {
+          auto *PHI = dyn_cast<PHINode>(P.first);
+          return !PHI || R.isDeleted(PHI);
+        }))
+      PHIToOpcodes.clear();
     VisitedInstrs.insert(Incoming.begin(), Incoming.end());
   } while (HaveVectorizedPhiNodes);
 
@@ -18648,7 +18650,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       }
       // Try to vectorize the incoming values of the PHI, to catch reductions
       // that feed into PHIs.
-      for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
+      for (unsigned I : seq<unsigned>(P->getNumIncomingValues())) {
         // Skip if the incoming block is the current BB for now. Also, bypass
         // unreachable IR for efficiency and to avoid crashing.
         // TODO: Collect the skipped incoming values and try to vectorize them
@@ -18660,9 +18662,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         // Postponed instructions should not be vectorized here, delay their
         // vectorization.
         if (auto *PI = dyn_cast<Instruction>(P->getIncomingValue(I));
-            PI && !IsInPostProcessInstrs(PI))
-          Changed |= vectorizeRootInstruction(nullptr, PI,
+            PI && !IsInPostProcessInstrs(PI)) {
+          bool Res = vectorizeRootInstruction(nullptr, PI,
                                               P->getIncomingBlock(I), R, TTI);
+          Changed |= Res;
+          if (Res && R.isDeleted(P)) {
+            It = BB->begin();
+            E = BB->end();
+            break;
+          }
+        }
       }
       continue;
     }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
index 68a85530ea2424..582c302dd2a156 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
@@ -693,3 +693,19 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_x.f.v_rtz_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_rtz_nxv8i64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64(
+    <vscale x 8 x i64> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
index 93716ba7f451c1..708b38b8ed1161 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
@@ -693,3 +693,19 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_xu.f.v_rtz_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_rtz_nxv8i64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64(
+    <vscale x 8 x i64> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
index e4b39c655a102f..334d5eba030012 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
@@ -708,3 +708,20 @@ entry:
 
   ret <vscale x 8 x i32> %a
 }
+
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_x.f.w_rtz_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_rtz_nxv8i32_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64(
+    <vscale x 8 x i32> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i32> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
index fd922438d05b36..bea99a0e81a348 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
@@ -708,3 +708,19 @@ entry:
 
   ret <vscale x 8 x i32> %a
 }
+
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_xu.f.w_rtz_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_rtz_nxv8i32_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64(
+    <vscale x 8 x i32> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i32> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
index 23b10250dfa486..9a80e02bbbbb44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
@@ -426,3 +426,19 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_x.f.v_rtz_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_rtz_nxv8i64_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32(
+    <vscale x 8 x i64> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
index f6779ec9ba5aa5..98caaf91ab3c01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
@@ -426,3 +426,19 @@ entry:
 
   ret <vscale x 8 x i64> %a
 }
+
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_xu.f.v_rtz_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_rtz_nxv8i64_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32(
+    <vscale x 8 x i64> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    iXLen 1, iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/MC/RISCV/rv32zcmt-valid.s b/llvm/test/MC/RISCV/rv32zcmt-valid.s
index bc58e189d0ffa1..a3829fed829f03 100644
--- a/llvm/test/MC/RISCV/rv32zcmt-valid.s
+++ b/llvm/test/MC/RISCV/rv32zcmt-valid.s
@@ -24,10 +24,10 @@
 
 # CHECK-ASM-AND-OBJ: cm.jt 1
 # CHECK-ASM: encoding: [0x06,0xa0]
-# CHECK-NO-EXT: error: instruction requires the following: 'Zcmt' (table jump instuctions for code-size reduction){{$}}
+# CHECK-NO-EXT: error: instruction requires the following: 'Zcmt' (table jump instructions for code-size reduction){{$}}
 cm.jt 1
 
 # CHECK-ASM-AND-OBJ: cm.jalt 32
 # CHECK-ASM: encoding: [0x82,0xa0]
-# CHECK-NO-EXT: error: instruction requires the following: 'Zcmt' (table jump instuctions for code-size reduction){{$}}
+# CHECK-NO-EXT: error: instruction requires the following: 'Zcmt' (table jump instructions for code-size reduction){{$}}
 cm.jalt 32
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index 70f0321039ea94..e9d5c353cbccfc 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -672,6 +672,38 @@ define i1 @assume_nonnan_x2_ord(float %x, float %y) {
   ret i1 %cmp
 }
 
+define i1 @assume_nan_x2_uno(float %x, float %y) {
+; CHECK-LABEL: @assume_nan_x2_uno(
+; CHECK-NEXT:    [[UNO_X:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[UNO_X]])
+; CHECK-NEXT:    [[UNO_Y:%.*]] = fcmp uno float [[Y:%.*]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[UNO_Y]])
+; CHECK-NEXT:    ret i1 true
+;
+  %uno.x = fcmp uno float %x, 0.0
+  call void @llvm.assume(i1 %uno.x)
+  %uno.y = fcmp uno float %y, 0.0
+  call void @llvm.assume(i1 %uno.y)
+  %cmp = fcmp uno float %x, %y
+  ret i1 %cmp
+}
+
+define i1 @assume_nan_x2_ord(float %x, float %y) {
+; CHECK-LABEL: @assume_nan_x2_ord(
+; CHECK-NEXT:    [[UNO_X:%.*]] = fcmp uno float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[UNO_X]])
+; CHECK-NEXT:    [[UNO_Y:%.*]] = fcmp uno float [[Y:%.*]], 0.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 [[UNO_Y]])
+; CHECK-NEXT:    ret i1 false
+;
+  %uno.x = fcmp uno float %x, 0.0
+  call void @llvm.assume(i1 %uno.x)
+  %uno.y = fcmp uno float %y, 0.0
+  call void @llvm.assume(i1 %uno.y)
+  %cmp = fcmp ord float %x, %y
+  ret i1 %cmp
+}
+
 define i1 @assume_nonan_x2_uno(float %x, float %y) {
 ; CHECK-LABEL: @assume_nonan_x2_uno(
 ; CHECK-NEXT:    [[ORD_X:%.*]] = fcmp ord float [[X:%.*]], 0.000000e+00
diff --git a/llvm/test/Transforms/InstSimplify/known-never-nan.ll b/llvm/test/Transforms/InstSimplify/known-never-nan.ll
index 49a48ae42d0645..907eca0a856a81 100644
--- a/llvm/test/Transforms/InstSimplify/known-never-nan.ll
+++ b/llvm/test/Transforms/InstSimplify/known-never-nan.ll
@@ -512,12 +512,10 @@ define i1 @isKnownNeverNaN_nofpclass_callsite() {
 
 declare nofpclass(sub norm zero inf) double @only_nans()
 
-; TODO: Could simplify to false
 define i1 @isKnownNeverNaN_only_nans() {
 ; CHECK-LABEL: @isKnownNeverNaN_only_nans(
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @only_nans()
-; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord double [[CALL]], [[CALL]]
-; CHECK-NEXT:    ret i1 [[TMP]]
+; CHECK-NEXT:    ret i1 false
 ;
   %call = call double @only_nans()
   %tmp = fcmp ord double %call, %call
diff --git a/llvm/test/Transforms/InstSimplify/logic-of-fcmps.ll b/llvm/test/Transforms/InstSimplify/logic-of-fcmps.ll
index 4b2ff1b3d050cf..3a8bf53b32cab0 100644
--- a/llvm/test/Transforms/InstSimplify/logic-of-fcmps.ll
+++ b/llvm/test/Transforms/InstSimplify/logic-of-fcmps.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 ; Cycle through commuted variants where one operand of fcmp ord/uno is
 ; known not-a-NAN and the other is repeated in the logically-connected fcmp.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll
new file mode 100644
index 00000000000000..23f64b1e71662c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=i386-pc-windows-msvc19.34.0 -mcpu=pentium4 < %s | FileCheck %s
+
+define i32 @test(double %mul321.i) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: double [[MUL321_I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL321_I]], i32 0
+; CHECK-NEXT:    br label %[[DO_BODY220_I:.*]]
+; CHECK:       [[DO_BODY220_I]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[TMP6:%.*]], %[[DO_BODY221_I:.*]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[DO_BODY221_I]]
+; CHECK:       [[DO_BODY221_I]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT:    [[ADD318_I:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP6]] = insertelement <2 x double> [[TMP0]], double [[ADD318_I]], i32 1
+; CHECK-NEXT:    br label %[[DO_BODY220_I]]
+;
+entry:
+  br label %do.body220.i
+
+do.body220.i:
+  %c1.2.i = phi double [ %mul321.i, %do.body221.i ], [ 0.000000e+00, %entry ]
+  %s1.1.i = phi double [ %add318.i, %do.body221.i ], [ 0.000000e+00, %entry ]
+  br label %do.body221.i
+
+do.body221.i:                                     ; preds = %do.body220.i
+  %sub311.i1 = fadd double %c1.2.i, 0.000000e+00
+  %add315.i = fadd double %s1.1.i, 0.000000e+00
+  %mul316.i = fmul double %sub311.i1, 0.000000e+00
+  %mul317.i = fmul double %add315.i, 0.000000e+00
+  %add318.i = fadd double %mul316.i, %mul317.i
+  br label %do.body220.i
+}
diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test b/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test
index 9e683b9f68c939..20a836cae8bfe1 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/remove-section-in-group.test
@@ -1,6 +1,6 @@
 ## This checks that the group section is shrunk when its member is removed.
 
-# RUN: yaml2obj %s -o - \
+# RUN: yaml2obj --docnum=1 %s -o - \
 # RUN:   | llvm-objcopy -R .foo - - \
 # RUN:   | obj2yaml - \
 # RUN:   | FileCheck %s
@@ -35,3 +35,51 @@ Symbols:
   - Name:     foo_bar_grp
     Section:  .group
     Binding:  STB_GLOBAL
+
+# RUN: yaml2obj --docnum=2 %s -o %t
+# RUN: llvm-objcopy --remove-section=.debug_macro %t
+# RUN: llvm-readelf --section-groups %t | FileCheck %s --check-prefix=GROUP-REMOVED
+
+--- !ELF
+FileHeader:
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_REL
+  Machine:    EM_X86_64
+Sections:
+  - Name:     .group
+    Type:     SHT_GROUP
+    Info:     foo_grp
+    Members:
+      - SectionOrType:  GRP_COMDAT
+      - SectionOrType:  .debug_macro
+  - Name:     .debug_macro
+    Type:     SHT_PROGBITS
+    Flags:    [ SHF_GROUP ]
+Symbols:
+  - Name:     foo_grp
+    Section:  .group
+
+# GROUP-REMOVED: There are no section groups in this file.
+
+# RUN: yaml2obj --docnum=3 %s -o %t
+# RUN: llvm-objcopy --remove-section=.group %t
+# RUN: llvm-readelf --section-groups %t | FileCheck %s --check-prefix=EMPTY-GROUP-REMOVED
+
+--- !ELF
+FileHeader:
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_REL
+  Machine:    EM_X86_64
+Sections:
+  - Name:     .group
+    Type:     SHT_GROUP
+    Info:     foo_grp
+    Members:
+      - SectionOrType:  GRP_COMDAT
+Symbols:
+  - Name:     foo_grp
+    Section:  .group
+
+# EMPTY-GROUP-REMOVED: There are no section groups in this file.
\ No newline at end of file
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 2ce0668601bc6a..1f6c4c604d57b5 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -1123,7 +1123,7 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
     std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
     size_t PrefixPos = StringRef::npos;
     for (auto &FilePrefix : FilePrefixes) {
-      std::string NamePrefix = FilePrefix + kGlobalIdentifierDelimiter;
+      std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
       PrefixPos = Name.find_insensitive(NamePrefix);
       if (PrefixPos == StringRef::npos)
         continue;
@@ -1421,7 +1421,8 @@ remapSamples(const sampleprof::FunctionSamples &Samples,
     for (const auto &Callsite : CallsiteSamples.second) {
       sampleprof::FunctionSamples Remapped =
           remapSamples(Callsite.second, Remapper, Error);
-      MergeResult(Error, Target[Remapped.getFunction()].merge(Remapped));
+      mergeSampleProfErrors(Error,
+                            Target[Remapped.getFunction()].merge(Remapped));
     }
   }
   return Result;
@@ -1542,7 +1543,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
                    : FunctionSamples();
       FunctionSamples &Samples = Remapper ? Remapped : I->second;
       SampleContext FContext = Samples.getContext();
-      MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
+      mergeSampleProfErrors(Result,
+                            ProfileMap[FContext].merge(Samples, Input.Weight));
       if (Result != sampleprof_error::success) {
         std::error_code EC = make_error_code(Result);
         handleMergeWriterError(errorCodeToError(EC), Input.Filename,
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 2118e954fe5436..53a25b279b432d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -489,7 +489,7 @@ bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
 FunctionSamples &
 ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
   SampleContext Context(FuncName);
-  return ProfileMap.Create(Context);
+  return ProfileMap.create(Context);
 }
 
 void ProfileGenerator::generateProfile() {
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index e523ae90966d79..161ee51432cd33 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -35,19 +35,7 @@ struct SandboxIRTest : public testing::Test {
   }
 };
 
-TEST_F(SandboxIRTest, UserInstantiation) {
-  parseIR(C, R"IR(
-define void @foo(i32 %v1) {
-  ret void
-}
-)IR");
-  Function &F = *M->getFunction("foo");
-  auto *Ret = F.begin()->getTerminator();
-  sandboxir::Context Ctx(C);
-  [[maybe_unused]] sandboxir::User U(sandboxir::Value::ClassID::User, Ret, Ctx);
-}
-
-TEST_F(SandboxIRTest, FunctionArgumentConstantAndOpaqueInstInstantiation) {
+TEST_F(SandboxIRTest, ClassID) {
   parseIR(C, R"IR(
 define void @foo(i32 %v1) {
   %add = add i32 %v1, 42
@@ -58,51 +46,66 @@ define void @foo(i32 %v1) {
   llvm::BasicBlock *LLVMBB = &*LLVMF->begin();
   llvm::Instruction *LLVMAdd = &*LLVMBB->begin();
   auto *LLVMC = cast<llvm::Constant>(LLVMAdd->getOperand(1));
-  auto *LLVMArg0 = LLVMF->getArg(0);
 
   sandboxir::Context Ctx(C);
-  sandboxir::Function F(LLVMF, Ctx);
-  sandboxir::Argument Arg0(LLVMArg0, Ctx);
-  sandboxir::Constant Const0(LLVMC, Ctx);
-  sandboxir::OpaqueInst OpaqueI(LLVMAdd, Ctx);
+  sandboxir::Function *F = Ctx.createFunction(LLVMF);
+  sandboxir::Argument *Arg0 = F->getArg(0);
+  sandboxir::BasicBlock *BB = &*F->begin();
+  sandboxir::Instruction *AddI = &*BB->begin();
+  sandboxir::OpaqueInst *OpaqueI = cast<sandboxir::OpaqueInst>(AddI);
+  sandboxir::Constant *Const0 = cast<sandboxir::Constant>(Ctx.getValue(LLVMC));
 
   EXPECT_TRUE(isa<sandboxir::Function>(F));
   EXPECT_FALSE(isa<sandboxir::Function>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::Function>(BB));
+  EXPECT_FALSE(isa<sandboxir::Function>(AddI));
   EXPECT_FALSE(isa<sandboxir::Function>(Const0));
   EXPECT_FALSE(isa<sandboxir::Function>(OpaqueI));
 
   EXPECT_FALSE(isa<sandboxir::Argument>(F));
   EXPECT_TRUE(isa<sandboxir::Argument>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::Argument>(BB));
+  EXPECT_FALSE(isa<sandboxir::Argument>(AddI));
   EXPECT_FALSE(isa<sandboxir::Argument>(Const0));
   EXPECT_FALSE(isa<sandboxir::Argument>(OpaqueI));
 
   EXPECT_TRUE(isa<sandboxir::Constant>(F));
   EXPECT_FALSE(isa<sandboxir::Constant>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::Constant>(BB));
+  EXPECT_FALSE(isa<sandboxir::Constant>(AddI));
   EXPECT_TRUE(isa<sandboxir::Constant>(Const0));
   EXPECT_FALSE(isa<sandboxir::Constant>(OpaqueI));
 
   EXPECT_FALSE(isa<sandboxir::OpaqueInst>(F));
   EXPECT_FALSE(isa<sandboxir::OpaqueInst>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::OpaqueInst>(BB));
+  EXPECT_TRUE(isa<sandboxir::OpaqueInst>(AddI));
   EXPECT_FALSE(isa<sandboxir::OpaqueInst>(Const0));
   EXPECT_TRUE(isa<sandboxir::OpaqueInst>(OpaqueI));
 
   EXPECT_FALSE(isa<sandboxir::Instruction>(F));
   EXPECT_FALSE(isa<sandboxir::Instruction>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::Instruction>(BB));
+  EXPECT_TRUE(isa<sandboxir::Instruction>(AddI));
   EXPECT_FALSE(isa<sandboxir::Instruction>(Const0));
   EXPECT_TRUE(isa<sandboxir::Instruction>(OpaqueI));
 
   EXPECT_FALSE(isa<sandboxir::User>(F));
   EXPECT_FALSE(isa<sandboxir::User>(Arg0));
+  EXPECT_FALSE(isa<sandboxir::User>(BB));
+  EXPECT_TRUE(isa<sandboxir::User>(AddI));
   EXPECT_TRUE(isa<sandboxir::User>(Const0));
   EXPECT_TRUE(isa<sandboxir::User>(OpaqueI));
 
 #ifndef NDEBUG
-  // The dump() functions should be very forgiving and should not crash even if
-  // sandboxir has not been built properly.
-  F.dump();
-  Arg0.dump();
-  Const0.dump();
-  OpaqueI.dump();
+  std::string Buff;
+  raw_string_ostream BS(Buff);
+  F->dump(BS);
+  Arg0->dump(BS);
+  BB->dump(BS);
+  AddI->dump(BS);
+  Const0->dump(BS);
+  OpaqueI->dump(BS);
 #endif
 }
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
index b05366d2a635df..aefc02bce40fb0 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td
@@ -1077,4 +1077,13 @@ def LLVM_PoisonAttr : LLVM_Attr<"Poison", "poison">;
 /// Folded into from LLVM::ZeroOp.
 def LLVM_ZeroAttr : LLVM_Attr<"Zero", "zero">;
 
+//===----------------------------------------------------------------------===//
+// TailCallKindAttr
+//===----------------------------------------------------------------------===//
+
+def TailCallKindAttr : LLVM_Attr<"TailCallKind", "tailcallkind"> {
+  let parameters = (ins "TailCallKind":$tailCallKind);
+  let assemblyFormat = "`<` $tailCallKind `>`";
+}
+
 #endif // LLVMIR_ATTRDEFS
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
index 3a93be21da3756..3ede8577332422 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrs.h
@@ -89,6 +89,7 @@ class TBAANodeAttr : public Attribute {
 // TODO: this shouldn't be needed after we unify the attribute generation, i.e.
 // --gen-attr-* and --gen-attrdef-*.
 using cconv::CConv;
+using tailcallkind::TailCallKind;
 using linkage::Linkage;
 } // namespace LLVM
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
index f8e85004d5f93c..f41a97f9ecc818 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
@@ -279,6 +279,35 @@ def CConv : DialectAttr<
           "::mlir::LLVM::CConvAttr::get($_builder.getContext(), $0)";
 }
 
+//===----------------------------------------------------------------------===//
+// TailCallKind
+//===----------------------------------------------------------------------===//
+
+def TailCallKindNone : LLVM_EnumAttrCase<"None", "none", "TCK_None", 0>;
+def TailCallKindTail : LLVM_EnumAttrCase<"Tail", "tail", "TCK_Tail", 1>;
+def TailCallKindMustTail : LLVM_EnumAttrCase<"MustTail", "musttail", "TCK_MustTail", 2>;
+def TailCallKindNoTailCall : LLVM_EnumAttrCase<"NoTail", "notail", "TCK_NoTail", 3>;
+
+def TailCallKindEnum : LLVM_EnumAttr<
+    "TailCallKind",
+    "::llvm::CallInst::TailCallKind",
+    "Tail Call Kind",
+    [TailCallKindNone, TailCallKindNoTailCall,
+    TailCallKindMustTail, TailCallKindTail]> {
+  let cppNamespace = "::mlir::LLVM::tailcallkind";
+}
+
+def TailCallKind : DialectAttr<
+    LLVM_Dialect,
+    CPred<"::llvm::isa<::mlir::LLVM::TailCallKindAttr>($_self)">,
+    "LLVM Calling Convention specification"> {
+  let storageType = "::mlir::LLVM::TailCallKindAttr";
+  let returnType = "::mlir::LLVM::tailcallkind::TailCallKind";
+  let convertFromStorage = "$_self.getTailCallKind()";
+  let constBuilderCall =
+          "::mlir::LLVM::TailCallKindAttr::get($_builder.getContext(), $0)";
+}
+
 //===----------------------------------------------------------------------===//
 // DIEmissionKind
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 54f38c93e50808..65dfcf93d70294 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -650,7 +650,8 @@ def LLVM_CallOp : LLVM_MemAccessOpBase<"call",
                   DefaultValuedAttr<LLVM_FastmathFlagsAttr,
                                    "{}">:$fastmathFlags,
                   OptionalAttr<DenseI32ArrayAttr>:$branch_weights,
-                  DefaultValuedAttr<CConv, "CConv::C">:$CConv);
+                  DefaultValuedAttr<CConv, "CConv::C">:$CConv,
+                  DefaultValuedAttr<TailCallKind, "TailCallKind::None">:$TailCallKind);
   // Append the aliasing related attributes defined in LLVM_MemAccessOpBase.
   let arguments = !con(args, aliasAttrs);
   let results = (outs Optional<LLVM_Type>:$result);
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index dc255e772841c2..148bed62aa8f2b 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1121,6 +1121,20 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel",
 
   let regions = (region AnyRegion:$region);
 
+  let builders = [
+    OpBuilder<(ins
+      CArg<"mlir::ValueRange", "{}">:$numGangs,
+      CArg<"mlir::ValueRange", "{}">:$numWorkers,
+      CArg<"mlir::ValueRange", "{}">:$vectorLength,
+      CArg<"mlir::ValueRange", "{}">:$asyncOperands,
+      CArg<"mlir::ValueRange", "{}">:$waitOperands,
+      CArg<"mlir::Value", "{}">:$ifCond,
+      CArg<"mlir::Value", "{}">:$selfCond,
+      CArg<"mlir::ValueRange", "{}">:$reductionOperands,
+      CArg<"mlir::ValueRange", "{}">:$gangPrivateOperands,
+      CArg<"mlir::ValueRange", "{}">:$gangFirstPrivateOperands,
+      CArg<"mlir::ValueRange", "{}">:$dataClauseOperands)>];
+
   let extraClassDeclaration = [{
     /// The number of data operands.
     unsigned getNumDataOperands();
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
index 58764ad38e34f6..b3798a3f7624b0 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
+++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
@@ -67,9 +67,6 @@ LogicalResult ExtFOnFloat8RewritePattern::match(arith::ExtFOp op) const {
   if (auto inVecType = dyn_cast<VectorType>(inType)) {
     if (inVecType.isScalable())
       return failure();
-    if (inVecType.getShape().size() > 1)
-      // Multi-dimensional vectors are currently unsupported.
-      return failure();
     inType = inVecType.getElementType();
   }
   return success(inType.isFloat8E5M2FNUZ() || inType.isFloat8E4M3FNUZ());
@@ -80,28 +77,38 @@ void ExtFOnFloat8RewritePattern::rewrite(arith::ExtFOp op,
   Location loc = op.getLoc();
   Value in = op.getIn();
   Type outElemType = getElementTypeOrSelf(op.getOut().getType());
-  if (!isa<VectorType>(in.getType())) {
+  auto inType = dyn_cast<VectorType>(in.getType());
+  if (!inType) {
     Value asFloat = rewriter.create<amdgpu::ExtPackedFp8Op>(
         loc, rewriter.getF32Type(), in, 0);
     Value result = castF32To(outElemType, asFloat, loc, rewriter);
     return rewriter.replaceOp(op, result);
   }
-  VectorType inType = cast<VectorType>(in.getType());
   int64_t numElements = inType.getNumElements();
   Value zero = rewriter.create<arith::ConstantOp>(
       loc, outElemType, rewriter.getFloatAttr(outElemType, 0.0));
-  Value result =
-      rewriter.createOrFold<vector::SplatOp>(loc, op.getOut().getType(), zero);
   if (inType.getShape().empty()) {
     Value scalarIn =
         rewriter.create<vector::ExtractOp>(loc, in, ArrayRef<int64_t>{});
     // Recurse to send the 0-D vector case to the 1-D vector case
     Value scalarExt =
         rewriter.create<arith::ExtFOp>(loc, outElemType, scalarIn);
-    result = rewriter.create<vector::InsertOp>(loc, scalarExt, zero,
-                                               ArrayRef<int64_t>{});
+    Value result = rewriter.create<vector::InsertOp>(loc, scalarExt, zero,
+                                                     ArrayRef<int64_t>{});
     return rewriter.replaceOp(op, result);
   }
+
+  VectorType outType = cast<VectorType>(op.getOut().getType());
+  VectorType flatTy = VectorType::get(SmallVector<int64_t>{numElements},
+                                      outType.getElementType());
+  Value result = rewriter.createOrFold<vector::SplatOp>(loc, flatTy, zero);
+
+  if (inType.getRank() > 1) {
+    inType = VectorType::get(SmallVector<int64_t>{numElements},
+                             inType.getElementType());
+    in = rewriter.create<vector::ShapeCastOp>(loc, inType, in);
+  }
+
   for (int64_t i = 0; i < numElements; i += 4) {
     int64_t elemsThisOp = std::min(numElements, i + 4) - i;
     Value inSlice = rewriter.create<vector::ExtractStridedSliceOp>(
@@ -113,6 +120,11 @@ void ExtFOnFloat8RewritePattern::rewrite(arith::ExtFOp op,
       result = rewriter.create<vector::InsertOp>(loc, asType, result, i + j);
     }
   }
+
+  if (inType.getRank() != outType.getRank()) {
+    result = rewriter.create<vector::ShapeCastOp>(loc, outType, result);
+  }
+
   rewriter.replaceOp(op, result);
 }
 
@@ -181,9 +193,6 @@ LogicalResult TruncFToFloat8RewritePattern::match(arith::TruncFOp op) const {
   if (auto outVecType = dyn_cast<VectorType>(outType)) {
     if (outVecType.isScalable())
       return failure();
-    if (outVecType.getShape().size() > 1)
-      // Multi-dimensional vectors are currently unsupported.
-      return failure();
     outType = outVecType.getElementType();
   }
   auto inType = dyn_cast<FloatType>(getElementTypeOrSelf(op.getIn().getType()));
@@ -200,8 +209,9 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op,
   Type outElemType = getElementTypeOrSelf(op.getOut().getType());
   if (saturateFP8)
     in = clampInput(rewriter, loc, outElemType, in);
+  auto inVectorTy = dyn_cast<VectorType>(in.getType());
   VectorType truncResType = VectorType::get(4, outElemType);
-  if (!isa<VectorType>(in.getType())) {
+  if (!inVectorTy) {
     Value asFloat = castToF32(in, loc, rewriter);
     Value asF8s = rewriter.create<amdgpu::PackedTrunc2xFp8Op>(
         loc, truncResType, asFloat, /*sourceB=*/nullptr, 0,
@@ -213,18 +223,27 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op,
   int64_t numElements = outType.getNumElements();
   Value zero = rewriter.create<arith::ConstantOp>(
       loc, outElemType, rewriter.getFloatAttr(outElemType, 0.0));
-  Value result = rewriter.createOrFold<vector::SplatOp>(loc, outType, zero);
   if (outType.getShape().empty()) {
     Value scalarIn =
         rewriter.create<vector::ExtractOp>(loc, in, ArrayRef<int64_t>{});
     // Recurse to send the 0-D vector case to the 1-D vector case
     Value scalarTrunc =
         rewriter.create<arith::TruncFOp>(loc, outElemType, scalarIn);
-    result = rewriter.create<vector::InsertOp>(loc, scalarTrunc, zero,
-                                               ArrayRef<int64_t>{});
+    Value result = rewriter.create<vector::InsertOp>(loc, scalarTrunc, zero,
+                                                     ArrayRef<int64_t>{});
     return rewriter.replaceOp(op, result);
   }
 
+  VectorType flatTy = VectorType::get(SmallVector<int64_t>{numElements},
+                                      outType.getElementType());
+  Value result = rewriter.createOrFold<vector::SplatOp>(loc, flatTy, zero);
+
+  if (inVectorTy.getRank() > 1) {
+    inVectorTy = VectorType::get(SmallVector<int64_t>{numElements},
+                                 inVectorTy.getElementType());
+    in = rewriter.create<vector::ShapeCastOp>(loc, inVectorTy, in);
+  }
+
   for (int64_t i = 0; i < numElements; i += 4) {
     int64_t elemsThisOp = std::min(numElements, i + 4) - i;
     Value thisResult = nullptr;
@@ -245,6 +264,11 @@ void TruncFToFloat8RewritePattern::rewrite(arith::TruncFOp op,
     result = rewriter.create<vector::InsertStridedSliceOp>(loc, thisResult,
                                                            result, i, 1);
   }
+
+  if (inVectorTy.getRank() != outType.getRank()) {
+    result = rewriter.create<vector::ShapeCastOp>(loc, outType, result);
+  }
+
   rewriter.replaceOp(op, result);
 }
 
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index a01c4ee4923eba..9372caf6e32a73 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -44,6 +44,7 @@ using namespace mlir;
 using namespace mlir::LLVM;
 using mlir::LLVM::cconv::getMaxEnumValForCConv;
 using mlir::LLVM::linkage::getMaxEnumValForLinkage;
+using mlir::LLVM::tailcallkind::getMaxEnumValForTailCallKind;
 
 #include "mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc"
 
@@ -197,6 +198,7 @@ struct EnumTraits {};
 REGISTER_ENUM_TYPE(Linkage);
 REGISTER_ENUM_TYPE(UnnamedAddr);
 REGISTER_ENUM_TYPE(CConv);
+REGISTER_ENUM_TYPE(TailCallKind);
 REGISTER_ENUM_TYPE(Visibility);
 } // namespace
 
@@ -974,7 +976,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, TypeRange results,
   build(builder, state, results,
         TypeAttr::get(getLLVMFuncType(builder.getContext(), results, args)),
         callee, args, /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr,
-        /*CConv=*/nullptr,
+        /*CConv=*/nullptr, /*TailCallKind=*/nullptr,
         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
         /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
 }
@@ -997,7 +999,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, getCallOpResultTypes(calleeType),
         TypeAttr::get(calleeType), callee, args, /*fastmathFlags=*/nullptr,
         /*branch_weights=*/nullptr, /*CConv=*/nullptr,
-        /*access_groups=*/nullptr,
+        /*TailCallKind=*/nullptr, /*access_groups=*/nullptr,
         /*alias_scopes=*/nullptr, /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
 }
 
@@ -1006,7 +1008,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, getCallOpResultTypes(calleeType),
         TypeAttr::get(calleeType), /*callee=*/nullptr, args,
         /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr,
-        /*CConv=*/nullptr,
+        /*CConv=*/nullptr, /*TailCallKind=*/nullptr,
         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
         /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
 }
@@ -1017,7 +1019,7 @@ void CallOp::build(OpBuilder &builder, OperationState &state, LLVMFuncOp func,
   build(builder, state, getCallOpResultTypes(calleeType),
         TypeAttr::get(calleeType), SymbolRefAttr::get(func), args,
         /*fastmathFlags=*/nullptr, /*branch_weights=*/nullptr,
-        /*CConv=*/nullptr,
+        /*CConv=*/nullptr, /*TailCallKind=*/nullptr,
         /*access_groups=*/nullptr, /*alias_scopes=*/nullptr,
         /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
 }
@@ -1180,6 +1182,9 @@ void CallOp::print(OpAsmPrinter &p) {
   if (getCConv() != LLVM::CConv::C)
     p << stringifyCConv(getCConv()) << ' ';
 
+  if(getTailCallKind() != LLVM::TailCallKind::None)
+    p << tailcallkind::stringifyTailCallKind(getTailCallKind()) << ' ';
+
   // Print the direct callee if present as a function attribute, or an indirect
   // callee (first operand) otherwise.
   if (isDirect)
@@ -1194,7 +1199,8 @@ void CallOp::print(OpAsmPrinter &p) {
     p << " vararg(" << calleeType << ")";
 
   p.printOptionalAttrDict(processFMFAttr((*this)->getAttrs()),
-                          {getCConvAttrName(), "callee", "callee_type"});
+                          {getCConvAttrName(), "callee", "callee_type",
+                           getTailCallKindAttrName()});
 
   p << " : ";
   if (!isDirect)
@@ -1262,7 +1268,7 @@ static ParseResult parseOptionalCallFuncPtr(
   return success();
 }
 
-// <operation> ::= `llvm.call` (cconv)? (function-id | ssa-use)
+// <operation> ::= `llvm.call` (cconv)? (tailcallkind)? (function-id | ssa-use)
 //                             `(` ssa-use-list `)`
 //                             ( `vararg(` var-arg-func-type `)` )?
 //                             attribute-dict? `:` (type `,`)? function-type
@@ -1277,6 +1283,12 @@ ParseResult CallOp::parse(OpAsmParser &parser, OperationState &result) {
       CConvAttr::get(parser.getContext(), parseOptionalLLVMKeyword<CConv>(
                                               parser, result, LLVM::CConv::C)));
 
+  result.addAttribute(
+      getTailCallKindAttrName(result.name),
+      TailCallKindAttr::get(parser.getContext(),
+                            parseOptionalLLVMKeyword<TailCallKind>(
+                                parser, result, LLVM::TailCallKind::None)));
+
   // Parse a function pointer for indirect calls.
   if (parseOptionalCallFuncPtr(parser, operands))
     return failure();
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 01305898f252d5..c3c6dffd5ae49a 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -905,6 +905,31 @@ mlir::Value ParallelOp::getWaitDevnum(mlir::acc::DeviceType deviceType) {
                             deviceType);
 }
 
+void ParallelOp::build(mlir::OpBuilder &odsBuilder,
+                       mlir::OperationState &odsState,
+                       mlir::ValueRange numGangs, mlir::ValueRange numWorkers,
+                       mlir::ValueRange vectorLength,
+                       mlir::ValueRange asyncOperands,
+                       mlir::ValueRange waitOperands, mlir::Value ifCond,
+                       mlir::Value selfCond, mlir::ValueRange reductionOperands,
+                       mlir::ValueRange gangPrivateOperands,
+                       mlir::ValueRange gangFirstPrivateOperands,
+                       mlir::ValueRange dataClauseOperands) {
+
+  ParallelOp::build(
+      odsBuilder, odsState, asyncOperands, /*asyncOperandsDeviceType=*/nullptr,
+      /*asyncOnly=*/nullptr, waitOperands, /*waitOperandsSegments=*/nullptr,
+      /*waitOperandsDeviceType=*/nullptr, /*hasWaitDevnum=*/nullptr,
+      /*waitOnly=*/nullptr, numGangs, /*numGangsSegments=*/nullptr,
+      /*numGangsDeviceType=*/nullptr, numWorkers,
+      /*numWorkersDeviceType=*/nullptr, vectorLength,
+      /*vectorLengthDeviceType=*/nullptr, ifCond, selfCond,
+      /*selfAttr=*/nullptr, reductionOperands, /*reductionRecipes=*/nullptr,
+      gangPrivateOperands, /*privatizations=*/nullptr, gangFirstPrivateOperands,
+      /*firstprivatizations=*/nullptr, dataClauseOperands,
+      /*defaultAttr=*/nullptr, /*combined=*/nullptr);
+}
+
 static ParseResult parseNumGangs(
     mlir::OpAsmParser &parser,
     llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &operands,
@@ -2085,8 +2110,8 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
     llvm::interleaveComma(regionArgs, p,
                           [&p](Value v) { p << v << " : " << v.getType(); });
     p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
-      << upperbound << " : " << upperboundType << ") "
-      << " step (" << steps << " : " << stepType << ") ";
+      << upperbound << " : " << upperboundType << ") " << " step (" << steps
+      << " : " << stepType << ") ";
   }
   p.printRegion(region, /*printEntryBlockArgs=*/false);
 }
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
index f144c7158d6796..3d6dd1247b4136 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
@@ -218,6 +218,7 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
                                 operandsRef.drop_front());
     }
     call->setCallingConv(convertCConvToLLVM(callOp.getCConv()));
+    call->setTailCallKind(convertTailCallKindToLLVM(callOp.getTailCallKind()));
     moduleTranslation.setAccessGroupsMetadata(callOp, call);
     moduleTranslation.setAliasScopeMetadata(callOp, call);
     moduleTranslation.setTBAAMetadata(callOp, call);
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 0c8b3296f44a7d..9915576bbc458b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -1468,6 +1468,8 @@ LogicalResult ModuleImport::convertInstruction(llvm::Instruction *inst) {
       callOp = builder.create<CallOp>(loc, funcTy, operands);
     }
     callOp.setCConv(convertCConvFromLLVM(callInst->getCallingConv()));
+    callOp.setTailCallKind(
+        convertTailCallKindFromLLVM(callInst->getTailCallKind()));
     setFastmathFlagsAttr(inst, callOp);
     if (!callInst->getType()->isVoidTy())
       mapValue(inst, callOp.getResult());
diff --git a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
index 159a2f02f0560e..26a222a4a788e5 100644
--- a/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
+++ b/mlir/test/Conversion/ArithToAMDGPU/8-bit-floats.mlir
@@ -115,3 +115,61 @@ func.func @vector_trunc_long(%v: vector<9xf32>) -> vector<9xf8E4M3FNUZ> {
   %w = arith.truncf %v : vector<9xf32> to vector<9xf8E4M3FNUZ>
   return %w : vector<9xf8E4M3FNUZ>
 }
+
+// -----
+
+// CHECK-LABEL: func.func @vector_trunc_long_2d
+// CHECK-SAME: ([[V:%.+]]: vector<1x9xf32>)
+// CHECK: [[ZEROES:%.+]] = arith.constant dense<0.000000e+00> : vector<9xf8E4M3FNUZ>
+// CHECK: [[T0:%.+]] = amdgpu.packed_trunc_2xfp8 %{{.+}}, %{{.+}} into undef[word 0]
+// CHECK: [[T1:%.+]] = amdgpu.packed_trunc_2xfp8 %{{.+}}, %{{.+}} into [[T0]][word 1]
+// CHECK: [[W0:%.+]] = vector.insert_strided_slice [[T1]], [[ZEROES]] {offsets = [0], strides = [1]}
+
+// CHECK: [[T2:%.+]] = amdgpu.packed_trunc_2xfp8 %{{.+}}, %{{.+}} into undef[word 0]
+// CHECK: [[T3:%.+]] = amdgpu.packed_trunc_2xfp8 %{{.+}}, %{{.+}} into [[T2]][word 1]
+// CHECK: [[W1:%.+]] = vector.insert_strided_slice [[T3]], [[W0]] {offsets = [4], strides = [1]}
+
+// CHECK: [[T4:%.+]] = amdgpu.packed_trunc_2xfp8 %{{.+}}, undef into undef[word 0]
+// CHECK: [[T4_SHORT:%.+]] = vector.extract_strided_slice [[T4]] {offsets = [0], sizes = [1], strides = [1]}
+// CHECK: [[W:%.+]] = vector.insert_strided_slice [[T4_SHORT]], [[W1]] {offsets = [8], strides = [1]}
+// CHECK: [[RE:%.+]] = vector.shape_cast [[W]] : vector<9xf8E4M3FNUZ> to vector<1x9xf8E4M3FNUZ>
+// CHECK: return [[RE]]
+func.func @vector_trunc_long_2d(%v: vector<1x9xf32>) -> vector<1x9xf8E4M3FNUZ> {
+  %w = arith.truncf %v : vector<1x9xf32> to vector<1x9xf8E4M3FNUZ>
+  return %w : vector<1x9xf8E4M3FNUZ>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @vector_ext_long_2d
+// CHECK-SAME: ([[V:%.+]]: vector<1x9xf8E4M3FNUZ>)
+// CHECK: [[CAST:%.+]] = vector.shape_cast [[V]] : vector<1x9xf8E4M3FNUZ> to vector<9xf8E4M3FNUZ>
+// CHECK: [[V0:%.+]] = vector.extract_strided_slice [[CAST]] {offsets = [0], sizes = [4], strides = [1]}
+// CHECK: [[F0:%.+]] = amdgpu.ext_packed_fp8 [[V0]][0]
+// CHECK: [[W0:%.+]] = vector.insert [[F0]]
+// CHECK: [[F1:%.+]] = amdgpu.ext_packed_fp8 [[V0]][1]
+// CHECK: [[W1:%.+]] = vector.insert [[F1]], [[W0]]
+// CHECK: [[F2:%.+]] = amdgpu.ext_packed_fp8 [[V0]][2]
+// CHECK: [[W2:%.+]] = vector.insert [[F2]], [[W1]]
+// CHECK: [[F3:%.+]] = amdgpu.ext_packed_fp8 [[V0]][3]
+// CHECK: [[W3:%.+]] = vector.insert [[F3]], [[W2]]
+
+// CHECK: [[V1:%.+]] = vector.extract_strided_slice [[CAST]] {offsets = [4], sizes = [4], strides = [1]} : vector<9xf8E4M3FNUZ> to vector<4xf8E4M3FNUZ>
+// CHECK: [[F4:%.+]] = amdgpu.ext_packed_fp8 [[V1]][0]
+// CHECK: [[W4:%.+]] = vector.insert [[F4]], [[W3]]
+// CHECK: [[F5:%.+]] = amdgpu.ext_packed_fp8 [[V1]][1]
+// CHECK: [[W5:%.+]] = vector.insert [[F5]], [[W4]]
+// CHECK: [[F6:%.+]] = amdgpu.ext_packed_fp8 [[V1]][2]
+// CHECK: [[W6:%.+]] = vector.insert [[F6]], [[W5]]
+// CHECK: [[F7:%.+]] = amdgpu.ext_packed_fp8 [[V1]][3]
+// CHECK: [[W7:%.+]] = vector.insert [[F7]], [[W6]]
+
+// CHECK: [[V2:%.+]] = vector.extract_strided_slice [[CAST]] {offsets = [8], sizes = [1], strides = [1]} : vector<9xf8E4M3FNUZ> to vector<1xf8E4M3FNUZ>
+// CHECK: [[F8:%.+]] = amdgpu.ext_packed_fp8 [[V2]][0]
+// CHECK: [[W8:%.+]] = vector.insert [[F8]], [[W7]]
+// CHECK: [[CAST:%.+]] = vector.shape_cast [[W8]] : vector<9xf32> to vector<1x9xf32>
+// CHECK: return [[CAST]]
+func.func @vector_ext_long_2d(%v: vector<1x9xf8E4M3FNUZ>) -> vector<1x9xf32> {
+  %w = arith.extf %v : vector<1x9xf8E4M3FNUZ> to vector<1x9xf32>
+  return %w : vector<1x9xf32>
+}
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
index 2386dde19301e3..ca9748a2b8b7bc 100644
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -673,3 +673,41 @@ llvm.func @experimental_constrained_fptrunc(%in: f64) {
   %4 = llvm.intr.experimental.constrained.fptrunc %in tonearestaway ignore : f64 to f32
   llvm.return
 }
+
+// CHECK: llvm.func @tail_call_target() -> i32
+llvm.func @tail_call_target() -> i32
+
+// CHECK-LABEL: @test_none
+llvm.func @test_none() -> i32 {
+  // CHECK-NEXT: llvm.call @tail_call_target() : () -> i32
+  %0 = llvm.call none @tail_call_target() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_default
+llvm.func @test_default() -> i32 {
+  // CHECK-NEXT: llvm.call @tail_call_target() : () -> i32
+  %0 = llvm.call @tail_call_target() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_musttail
+llvm.func @test_musttail() -> i32 {
+  // CHECK-NEXT: llvm.call musttail @tail_call_target() : () -> i32
+  %0 = llvm.call musttail @tail_call_target() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_tail
+llvm.func @test_tail() -> i32 {
+  // CHECK-NEXT: llvm.call tail @tail_call_target() : () -> i32
+  %0 = llvm.call tail @tail_call_target() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_notail
+llvm.func @test_notail() -> i32 {
+  // CHECK-NEXT: llvm.call notail @tail_call_target() : () -> i32
+  %0 = llvm.call notail @tail_call_target() : () -> i32
+  llvm.return %0 : i32
+}
diff --git a/mlir/test/Dialect/LLVMIR/tail-call-kinds.mlir b/mlir/test/Dialect/LLVMIR/tail-call-kinds.mlir
new file mode 100644
index 00000000000000..73a6aa2f91cbaa
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/tail-call-kinds.mlir
@@ -0,0 +1,39 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// CHECK: declare i32 @foo()
+llvm.func @foo() -> i32
+
+// CHECK-LABEL: @test_none
+llvm.func @test_none() -> i32 {
+  // CHECK-NEXT: call i32 @foo()
+  %0 = llvm.call none @foo() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_default
+llvm.func @test_default() -> i32 {
+  // CHECK-NEXT: call i32 @foo()
+  %0 = llvm.call @foo() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_musttail
+llvm.func @test_musttail() -> i32 {
+  // CHECK-NEXT: musttail call i32 @foo()
+  %0 = llvm.call musttail @foo() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_tail
+llvm.func @test_tail() -> i32 {
+  // CHECK-NEXT: tail call i32 @foo()
+  %0 = llvm.call tail @foo() : () -> i32
+  llvm.return %0 : i32
+}
+
+// CHECK-LABEL: @test_notail
+llvm.func @test_notail() -> i32 {
+  // CHECK-NEXT: notail call i32 @foo()
+  %0 = llvm.call notail @foo() : () -> i32
+  llvm.return %0 : i32
+}
diff --git a/mlir/test/Target/LLVMIR/Import/tail-kind.ll b/mlir/test/Target/LLVMIR/Import/tail-kind.ll
new file mode 100644
index 00000000000000..608ae4043b6719
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/Import/tail-kind.ll
@@ -0,0 +1,35 @@
+; RUN: mlir-translate -import-llvm -split-input-file %s | FileCheck %s
+
+; CHECK: llvm.func @tailkind()
+declare void @tailkind()
+
+; CHECK-LABEL: @call_tailkind
+define void @call_tailkind() {
+  ; CHECK: llvm.call musttail @tailkind()
+  musttail call void @tailkind()
+  ret void
+}
+
+; // -----
+
+; CHECK: llvm.func @tailkind()
+declare void @tailkind()
+
+; CHECK-LABEL: @call_tailkind
+define void @call_tailkind() {
+  ; CHECK: llvm.call tail @tailkind()
+  tail call void @tailkind()
+  ret void
+}
+
+; // -----
+
+; CHECK: llvm.func @tailkind()
+declare void @tailkind()
+
+; CHECK-LABEL: @call_tailkind
+define void @call_tailkind() {
+  ; CHECK: llvm.call notail @tailkind()
+  notail call void @tailkind()
+  ret void
+}
diff --git a/revert_patches.txt b/revert_patches.txt
index 593d3d643df07c..cf6816a7c04f1a 100644
--- a/revert_patches.txt
+++ b/revert_patches.txt
@@ -103,3 +103,7 @@ f8b1ca4992a2 [MCParser] .altmacro: Support argument expansion not preceded by
 Reverts: breaks hipBlender* tests in compilation step
 385118644cca [SLP]Remove operands upon marking instruction for deletion.
 ---
+Revert: more Sema:
+9e1f1cfa59c4 [Clang][Sema] Handle class member access expressions with valid nested-name-specifiers that become invalid after lookup (#98167)
+RonL
+==