Skip to content

Commit

Permalink
Improve memory access
Browse files Browse the repository at this point in the history
- Detect more stack-allocated memory addresses, see doe300/VC4CL#106
- Reduce usage of unaligned VPM access
  • Loading branch information
doe300 committed Feb 2, 2022
1 parent 1a6e4fd commit 90d16ae
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 10 deletions.
9 changes: 9 additions & 0 deletions src/intermediate/Operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,11 @@ qpu_asm::DecoratedInstruction Operation::convertToAsm(const FastMap<const Local*
"Can't perform operation with two distinguish literals", to_string());
regA = REG_NOP;
}
else if(input0 != input1 &&
((input0.second && input1.first.file == RegisterFile::PHYSICAL_B) ||
(input1.second && input0.first.file == RegisterFile::PHYSICAL_B)))
throw CompilationError(CompilationStep::CODE_GENERATION,
"Can't perform operation with two distinct inputs using the same mux", to_string());
else if(input0.second && input1.first.file == RegisterFile::ACCUMULATOR)
// don't read register overlapped by accumulator, e.g. UNIFORM
regA = REG_NOP;
Expand Down Expand Up @@ -942,6 +947,10 @@ qpu_asm::DecoratedInstruction CombinedOperation::convertToAsm(const FastMap<cons
throw CompilationError(CompilationStep::CODE_GENERATION,
"Can't map combined instruction with two distinct immediate arguments", to_string());

if(addOp->getOutput() && addOp->getOutput() == mulOp->getOutput() &&
addInstr.getAddOutput() != mulInstr.getMulOutput())
throw CompilationError(CompilationStep::CODE_GENERATION,
"Shared output for combined instructions is wrongly mapped to different output registers", to_string());
if(addInstr.getAddOutput().file == mulInstr.getMulOutput().file && addInstr.getAddOutput().isGeneralPurpose() &&
mulInstr.getMulOutput().isGeneralPurpose())
// This also does not work for output to the same register (on a physical register-file), since we cannot
Expand Down
20 changes: 14 additions & 6 deletions src/normalization/MemoryMapChecks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,15 @@ static Optional<FastSet<const IntermediateInstruction*>> checkAllWritersArePhiNo
if(excludedInstructions.find(writer) != excludedInstructions.end())
// skip the (possible write) we are currently processing
return;
if(auto memoryAccess = dynamic_cast<const MemoryInstruction*>(writer))
{
if(memoryAccess->op != MemoryOperation::READ)
// the contents are written (or read), not the address itself
return;
if(!memoryAccess->getDestination().hasLocal(local))
// the contents of the address is read, not the address written
return;
}
if(writer->hasDecoration(InstructionDecorations::PHI_NODE))
{
if(writer->assertArgument(0).checkLocal())
Expand Down Expand Up @@ -402,7 +411,6 @@ static void addConditionalLocalMapping(const Local* loc, const ConditionalMemory

static const Local* determineSingleMemoryAreaMapping(MemoryAccessMap& mapping,
TypedInstructionWalker<intermediate::MemoryInstruction> it, const Local* local,
const intermediate::MemoryInstruction* memInstr,
FastMap<TypedInstructionWalker<intermediate::MemoryInstruction>,
std::pair<const Local*, FastSet<const ConditionalMemoryAccess*>>>& conditionalWrittenMemoryAccesses,
FastSet<ConditionalMemoryAccess>& conditionalAddressWrites)
Expand Down Expand Up @@ -449,7 +457,7 @@ static const Local* determineSingleMemoryAreaMapping(MemoryAccessMap& mapping,
if(isMemoryOnlyRead(local))
{
// global buffer
if(getConstantElementValue(memInstr->getSource()) && memInstr->getNumEntries().hasLiteral(1_lit))
if(getConstantElementValue(it->getSource()) && it->getNumEntries().hasLiteral(1_lit))
{
CPPLOG_LAZY(logging::Level::DEBUG,
log << "Constant element of constant buffer '" << local->to_string()
Expand Down Expand Up @@ -493,7 +501,7 @@ static const Local* determineSingleMemoryAreaMapping(MemoryAccessMap& mapping,
mapping[local].fallback = MemoryAccessType::RAM_READ_WRITE_VPM;
}
}
else if(auto conditonalWrites = checkAllWritersArePhiNodesOrSelections(local, {it.get(), memInstr}))
else if(auto conditonalWrites = checkAllWritersArePhiNodesOrSelections(local, {it.get()}))
{
// If the local is a PHI result from different memory addresses, we can still map it, if the
// source addresses have the same access type. Since the "original" access types of the sources
Expand Down Expand Up @@ -613,7 +621,7 @@ static const Local* determineSingleMemoryAreaMapping(MemoryAccessMap& mapping,
log << "Failed to determine underlying memory area for '" << local->to_string()
<< "', trying again for its single source: " << sourceLocal->to_string() << logging::endl);
auto memoryLocal = determineSingleMemoryAreaMapping(
mapping, it, sourceLocal, memInstr, conditionalWrittenMemoryAccesses, conditionalAddressWrites);
mapping, it, sourceLocal, conditionalWrittenMemoryAccesses, conditionalAddressWrites);
// if we returned here, then we managed to (conditionally) map the local to a memory area. So edit the
// current local to reference the inner most local
if(local->type.getPointerType() && !local->get<LocalData>() && memoryLocal &&
Expand All @@ -629,7 +637,7 @@ static const Local* determineSingleMemoryAreaMapping(MemoryAccessMap& mapping,
innerMappingIt->second.accessInstructions.emplace(it, local);
const_cast<Local*>(local)->set(ReferenceData(*memoryLocal, hasOffset ? ANY_ELEMENT : 0));
auto memoryParameter = memoryLocal->as<Parameter>();
if(memoryParameter && memInstr->op != MemoryOperation::READ &&
if(memoryParameter && it->op != MemoryOperation::READ &&
!has_flag(memoryParameter->decorations, ParameterDecorations::READ_ONLY))
{
// Mark the original memory area as being written to.
Expand Down Expand Up @@ -1007,7 +1015,7 @@ MemoryAccessInfo normalization::determineMemoryAccess(Method& method)
}
}
for(const auto local : memInstr->getMemoryAreas())
determineSingleMemoryAreaMapping(mapping, typeSafe(it, *memInstr), local, memInstr,
determineSingleMemoryAreaMapping(mapping, typeSafe(it, *memInstr), local,
conditionalWrittenMemoryAccesses, conditionalAddressWrites);
if(it.has())
allWalkers.emplace(it);
Expand Down
27 changes: 23 additions & 4 deletions src/periphery/VPM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "VPM.h"

#include "../Expression.h"
#include "../Profiler.h"
#include "../intermediate/Helper.h"
#include "../intermediate/VectorHelper.h"
Expand Down Expand Up @@ -587,8 +588,28 @@ static NODISCARD InstructionWalker calculateElementOffsetInVPM(Method& method, I
*/
static bool isUnalignedMemoryVPMAccess(const Value& offset, DataType elementType)
{
return elementType.getVectorWidth() > 1 &&
(!offset.getLiteralValue() || (offset.getLiteralValue()->unsignedInt() % elementType.getInMemoryWidth()) != 0);
if(elementType.getVectorWidth() == 1)
return false;
if(auto litOffset = offset.getLiteralValue())
return litOffset->unsignedInt() % elementType.getInMemoryWidth() != 0;
if(auto writer = offset.getSingleWriter())
{
if(auto expression = Expression::createRecursiveExpression(*writer, 3))
{
if(expression->code == OP_SHL && expression->arg1.getLiteralValue() &&
(expression->arg1.getLiteralValue()->unsignedInt() % 32) >=
static_cast<unsigned>(std::log2(elementType.getInMemoryWidth())))
return false;
if(expression->code == Expression::FAKEOP_UMUL &&
((expression->arg0.getLiteralValue() &&
expression->arg0.getLiteralValue()->unsignedInt() % elementType.getInMemoryWidth() == 0) ||
(expression->arg1.getLiteralValue() &&
expression->arg1.getLiteralValue()->unsignedInt() % elementType.getInMemoryWidth() == 0)))
return false;
}
}
// unless we can prove otherwise, assume unaligned
return true;
}

InstructionWalker VPM::insertCopyRAM(Method& method, InstructionWalker it, const Value& destAddress,
Expand Down Expand Up @@ -1358,7 +1379,6 @@ NODISCARD static InstructionWalker lowerReadVPM(
(load(Literal(genericSetup.value)), InstructionDecorations::VPM_READ_CONFIGURATION);
else if(isUnalignedMemoryVPMAccess(internalOffset, dataType))
{
// TODO make sure this block is only used where really really required!
// TODO if inAreaOffset guaranteed to lie within one row, skip loading of second?!
// TODO 64-bit version
/*
Expand Down Expand Up @@ -1451,7 +1471,6 @@ NODISCARD static InstructionWalker lowerWriteVPM(
(load(Literal(genericSetup.value)), InstructionDecorations::VPM_WRITE_CONFIGURATION);
else if(isUnalignedMemoryVPMAccess(internalOffset, dataType))
{
// TODO make sure this block is only used where really really required!
// TODO if inAreaOffset guaranteed to lie within one row, skip loading of second?!
// TODO 64-bit version
/*
Expand Down

0 comments on commit 90d16ae

Please sign in to comment.