From 817922e86e4a92a20c7e92f099d1c58a7bb8d5d8 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Mon, 6 Jan 2025 23:07:54 +0000 Subject: [PATCH] deploy: f89f79bb7fcf40332bd98c1f80cfcc2d81feea4f --- 404.html | 2 +- categories/analyzing/index.html | 6 +++--- categories/conditioning/index.html | 6 +++--- categories/design/index.html | 6 +++--- categories/domain-flow/index.html | 6 +++--- categories/dsp/index.html | 6 +++--- categories/filtering/index.html | 6 +++--- categories/identification/index.html | 6 +++--- categories/index.html | 6 +++--- categories/introduction/index.html | 6 +++--- categories/matrix-math/index.html | 6 +++--- categories/schedule/index.html | 6 +++--- categories/spacetime/index.html | 6 +++--- categories/transforming/index.html | 6 +++--- ch1/computational-spacetime/index.html | 8 ++++---- ch1/derivation/index.html | 8 ++++---- ch1/domain-flow/index.html | 8 ++++---- ch1/example/index.html | 8 ++++---- ch1/freeschedule/index.html | 8 ++++---- ch1/index.html | 8 ++++---- ch1/linearschedule/index.html | 8 ++++---- ch1/nextsteps/index.html | 8 ++++---- ch1/parallel-programming/index.html | 8 ++++---- ch1/spacetime/index.html | 8 ++++---- ch1/wavefront/index.html | 8 ++++---- ch2-moc/dfa/index.html | 8 ++++---- ch2-moc/dfm/index.html | 8 ++++---- ch2-moc/index.html | 8 ++++---- ch2-moc/nextsteps/index.html | 8 ++++---- ch2-moc/spm/index.html | 8 ++++---- ch3-design/currentstate/index.html | 8 ++++---- ch3-design/elements/index.html | 8 ++++---- ch3-design/energy/index.html | 8 ++++---- ch3-design/index.html | 8 ++++---- ch3-design/nextsteps/index.html | 8 ++++---- ch3-design/space/index.html | 8 ++++---- ch3-design/switching-energy/index.html | 8 ++++---- ch3-design/time/index.html | 8 ++++---- ch4/index.html | 8 ++++---- ch4/level1/index.html | 8 ++++---- ch4/level2/index.html | 8 ++++---- ch4/level3/index.html | 8 ++++---- ch5/factorization/index.html | 6 +++--- ch5/index.html | 8 ++++---- ch6/index.html | 8 ++++---- ch6/matrixkernels/index.html | 8 ++++---- ch7/index.html | 6 +++--- ch7/lu/index.html | 8 ++++---- ch7/solvers/index.html | 6 +++--- ch8/conditioning/index.html | 6 +++--- ch8/filters/index.html | 6 +++--- ch8/identification/index.html | 8 ++++---- ch8/index.html | 8 ++++---- ch8/spectral/index.html | 6 +++--- ch8/transforms/index.html | 6 +++--- contentdev/index.html | 8 ++++---- contentdev/prototype/index.html | 8 ++++---- index.html | 2 +- search/index.html | 8 ++++---- tags/algorithm/index.html | 6 +++--- tags/computational-spacetime/index.html | 6 +++--- tags/conditioning/index.html | 6 +++--- tags/derivation/index.html | 6 +++--- tags/domain-flow/index.html | 6 +++--- tags/dsp/index.html | 6 +++--- tags/filtering/index.html | 6 +++--- tags/free-schedule/index.html | 6 +++--- tags/identification/index.html | 6 +++--- tags/index-space/index.html | 6 +++--- tags/index.html | 6 +++--- tags/lattice/index.html | 6 +++--- tags/linear-schedule/index.html | 6 +++--- tags/matrix-multiply/index.html | 6 +++--- tags/spectral-analysis/index.html | 6 +++--- tags/transform/index.html | 6 +++--- 75 files changed, 258 insertions(+), 258 deletions(-) diff --git a/404.html b/404.html index 67e21b4..35e60a4 100644 --- a/404.html +++ b/404.html @@ -1,2 +1,2 @@ 404 Page not found - Domain Flow Architecture -

44

Not found

Whoops. Looks like this page doesn't exist ¯\_(ツ)_/¯.

Go to homepage

\ No newline at end of file +

44

Not found

Whoops. Looks like this page doesn't exist ¯\_(ツ)_/¯.

Go to homepage

\ No newline at end of file diff --git a/categories/analyzing/index.html b/categories/analyzing/index.html index 13a14fe..41502b1 100644 --- a/categories/analyzing/index.html +++ b/categories/analyzing/index.html @@ -1,10 +1,10 @@ Analyzing - Category - Domain Flow Architecture -

Category - Analyzing

S

\ No newline at end of file diff --git a/categories/conditioning/index.html b/categories/conditioning/index.html index 5652ac8..f5cc251 100644 --- a/categories/conditioning/index.html +++ b/categories/conditioning/index.html @@ -1,10 +1,10 @@ Conditioning - Category - Domain Flow Architecture -

Category - Conditioning

S

\ No newline at end of file diff --git a/categories/design/index.html b/categories/design/index.html index b3b52fa..b4c9d69 100644 --- a/categories/design/index.html +++ b/categories/design/index.html @@ -1,10 +1,10 @@ Design - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/domain-flow/index.html b/categories/domain-flow/index.html index e67db67..3982c1b 100644 --- a/categories/domain-flow/index.html +++ b/categories/domain-flow/index.html @@ -1,10 +1,10 @@ Domain-Flow - Category - Domain Flow Architecture -

Category - Domain-Flow

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/categories/dsp/index.html b/categories/dsp/index.html index 99fcd19..8d19521 100644 --- a/categories/dsp/index.html +++ b/categories/dsp/index.html @@ -1,10 +1,10 @@ Dsp - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/filtering/index.html b/categories/filtering/index.html index 103aa30..2b01b9c 100644 --- a/categories/filtering/index.html +++ b/categories/filtering/index.html @@ -1,10 +1,10 @@ Filtering - Category - Domain Flow Architecture -

Category - Filtering

D

\ No newline at end of file diff --git a/categories/identification/index.html b/categories/identification/index.html index 4bb3b89..1584aa9 100644 --- a/categories/identification/index.html +++ b/categories/identification/index.html @@ -1,10 +1,10 @@ Identification - Category - Domain Flow Architecture -

Category - Identification

I

\ No newline at end of file diff --git a/categories/index.html b/categories/index.html index ac876f1..97f5311 100644 --- a/categories/index.html +++ b/categories/index.html @@ -1,10 +1,10 @@ Categories - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/introduction/index.html b/categories/introduction/index.html index bc95b3c..e5ceaa9 100644 --- a/categories/introduction/index.html +++ b/categories/introduction/index.html @@ -1,10 +1,10 @@ Introduction - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/matrix-math/index.html b/categories/matrix-math/index.html index 83691ea..8c6e298 100644 --- a/categories/matrix-math/index.html +++ b/categories/matrix-math/index.html @@ -1,10 +1,10 @@ Matrix-Math - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/schedule/index.html b/categories/schedule/index.html index a846151..91d36f2 100644 --- a/categories/schedule/index.html +++ b/categories/schedule/index.html @@ -1,10 +1,10 @@ Schedule - Category - Domain Flow Architecture -

Category - Schedule

F

L

\ No newline at end of file diff --git a/categories/spacetime/index.html b/categories/spacetime/index.html index 3f54922..1011fcc 100644 --- a/categories/spacetime/index.html +++ b/categories/spacetime/index.html @@ -1,10 +1,10 @@ Spacetime - Category - Domain Flow Architecture -
\ No newline at end of file diff --git a/categories/transforming/index.html b/categories/transforming/index.html index 930a52f..506068d 100644 --- a/categories/transforming/index.html +++ b/categories/transforming/index.html @@ -1,10 +1,10 @@ Transforming - Category - Domain Flow Architecture -

Category - Transforming

T

  • Transforms
\ No newline at end of file diff --git a/ch1/computational-spacetime/index.html b/ch1/computational-spacetime/index.html index 06ce25f..2e58a0f 100644 --- a/ch1/computational-spacetime/index.html +++ b/ch1/computational-spacetime/index.html @@ -1,5 +1,5 @@ Computational Spacetime - Domain Flow Architecture -

Computational Spacetime

Computational Spacetime

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/derivation/index.html b/ch1/derivation/index.html index 4786d34..d92a2e3 100644 --- a/ch1/derivation/index.html +++ b/ch1/derivation/index.html @@ -3,7 +3,7 @@ The Linear Algebra universe is particularly rich in partial orders, something that has been exploited for centuries 1. Matrix Computations2 by Golub, and van Loan provide a comprehensive review. What follows may be a bit technical, but keep in mind the visualizations of the previous pages as you try to visualize what the math implies.">Derivation of the matrix multiply domain flow program - Domain Flow Architecture -

Derivation of the matrix multiply domain flow program

The concepts of partial and total orders are essential for finding optimal domain flow algorithms. +

Derivation of the matrix multiply domain flow program

The concepts of partial and total orders are essential for finding optimal domain flow algorithms. Partial orders, or Poset, are the source of high-performance, low-power execution patterns.

The Linear Algebra universe is particularly rich in partial orders, something that has been exploited for centuries 1. Matrix Computations2 by Golub, and van Loan provide @@ -83,12 +83,12 @@ b: b[i-1,j,k] c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } -

1: History of Matrices and Determinants

2: Matrix Computations, Gene Golub and Charles van Loan

\ No newline at end of file diff --git a/ch1/domain-flow/index.html b/ch1/domain-flow/index.html index b763a0b..d5bfc8e 100644 --- a/ch1/domain-flow/index.html +++ b/ch1/domain-flow/index.html @@ -7,7 +7,7 @@ Implementation technology will impact these phases differently, and we are seeking a programming model that is invariant to the difference. A thought experiment will shed light on the desired properties of such a model.">Domain Flow - Domain Flow Architecture -

Domain Flow

Domain Flow

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/example/index.html b/ch1/example/index.html index 2cecc17..2e012eb 100644 --- a/ch1/example/index.html +++ b/ch1/example/index.html @@ -3,7 +3,7 @@ compute ( (i,j,k) | 1 <= i,j,k <= N ) { a: a[i,j-1,k] b: b[i-1,j,k] c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } The underlying algorithm requires a domain of computation governed by a set of constraints, and a set of computational dependencies that implicitly define a partial order across all the operations in the computation. The partial order is readily visible in the need to have computed the result for $c[i,j,k-1]$ before the computation of $c[i,j,k]$ can commence. In contrast, the $a$ and $b$ recurrences are independent of each other.">An Example - Domain Flow Architecture -

An Example

Let’s look at a simple, but frequently used operator in Deep Learning inference: +

An Example

Let’s look at a simple, but frequently used operator in Deep Learning inference: dense matrix multiplication. A Domain Flow program 1 for this operator is shown below:

compute ( (i,j,k) | 1 <= i,j,k <= N ) {
     a: a[i,j-1,k]
@@ -38,12 +38,12 @@
 where the variable $a$ is defined.

A thorough understanding of the partial and total orders inherent in the parallel computation is essential for finding optimal domain flow algorithms.

High-performance, low-power execution patterns frequently involve a partial order that enables timely reuse of computational results, or creates flexibility to organize just-in-time arrival -of input operands to avoid memory elements.

In the next segment, let’s explore these execution patterns.

1: Derivation of Domain Flow Matmul

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/freeschedule/index.html b/ch1/freeschedule/index.html index b6d8354..9fbff11 100644 --- a/ch1/freeschedule/index.html +++ b/ch1/freeschedule/index.html @@ -1,5 +1,5 @@ Free Schedule - Domain Flow Architecture -

Free Schedule

Free Schedule

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/index.html b/ch1/index.html index b2e5c42..70c0711 100644 --- a/ch1/index.html +++ b/ch1/index.html @@ -3,16 +3,16 @@ High-performance, low-latency, energy-efficient computation is particularly important for the emerging application class of autonomous intelligent systems.">Domain Flow Algorithms - Domain Flow Architecture -

Domain Flow Algorithms

Domain Flow algorithms are parallel algorithms that incorporate the constraints of space and time. +

Domain Flow Algorithms

Domain Flow algorithms are parallel algorithms that incorporate the constraints of space and time. By honoring the delay that is inherent to exchanging information between two spatially separate computation or storage sites, domain flow algorithms can improve performance and energy efficiency compared to sequential programming models that depend on (globally addressable) random access memory.

High-performance, low-latency, energy-efficient computation is particularly important for the -emerging application class of autonomous intelligent systems.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/linearschedule/index.html b/ch1/linearschedule/index.html index 4e5d6f6..e28e48f 100644 --- a/ch1/linearschedule/index.html +++ b/ch1/linearschedule/index.html @@ -7,7 +7,7 @@ Let’s go through the thought experiment what the free schedule demands from a physical system. In the free schedule animation, the propagation recurrences distributing the $A$ and $B$ matrix elements throughout the 3D lattice run ‘ahead’ of the actual computational recurrence calculating the $C$ matrix elements.">Linear Schedules - Domain Flow Architecture -

Linear Schedules

Linear Schedules

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/nextsteps/index.html b/ch1/nextsteps/index.html index 45eb50a..6327b62 100644 --- a/ch1/nextsteps/index.html +++ b/ch1/nextsteps/index.html @@ -1,12 +1,12 @@ Next Steps - Domain Flow Architecture -

Next Steps

We have gone through a quick introduction to the basic concepts of parallel algorithm design. +

Next Steps

We have gone through a quick introduction to the basic concepts of parallel algorithm design. Before we dive into what makes good parallel algorithms, we first must take a quick detour -and discuss physical hardware organization of computing engines.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/parallel-programming/index.html b/ch1/parallel-programming/index.html index f829048..1f455d9 100644 --- a/ch1/parallel-programming/index.html +++ b/ch1/parallel-programming/index.html @@ -1,5 +1,5 @@ Parallel Programming - Domain Flow Architecture -

Parallel Programming

To appreciate the domain flow programming model and what it enables, you need to think about the physical +

Parallel Programming

To appreciate the domain flow programming model and what it enables, you need to think about the physical form a ‘program evaluator’ could take. In the days when a processor occupied the volume of a small room, any physical computational machine was limited to a single computational element. This implied that the execution of any algorithm had to be specified as a complete order in time. @@ -19,12 +19,12 @@ machines mentioned above. Furthermore, the optimal algorithm even changes when the same machine architecture introduces a new, typically faster, implementation. And we are not just talking about simple algorithmic changes, such as loop order or blocking, sometimes even the underlying mathematics needs to change.

Given the complexity of writing parallel algorithms, this one-off nature of parallel algorithm design begged -the question: is there a parallel programming model that is invariant to the implementation technology of the machine?

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/spacetime/index.html b/ch1/spacetime/index.html index 1a2d1bb..fede1c4 100644 --- a/ch1/spacetime/index.html +++ b/ch1/spacetime/index.html @@ -1,5 +1,5 @@ Constraints of Spacetime - Domain Flow Architecture -

Constraints of Spacetime

If you visualize the ‘world’ from the perspective of an operand flowing through a machine, +

Constraints of Spacetime

If you visualize the ‘world’ from the perspective of an operand flowing through a machine, you realize that a physical machine creates a specific spatial constraint for the movement of data. Processing nodes are fixed in space, and information is exchanged between nodes to accomplish some transformation. Nodes consume and generate information, and communication links move information (program and data) between nodes. @@ -22,12 +22,12 @@ the propagation of information. A computational event has to be able to ‘see’ its operands before it can commence. Otherwise stated, its operands need to lie in the future light cone.

These temporal constraints are further complicated by the fact that man-made structures today do not communicate through free space yet, and the physical communication structure adds additional constraints -on the shape and extend of the future cone.

These man-made computational structures are dubbed computational spacetimes.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch1/wavefront/index.html b/ch1/wavefront/index.html index cbb4aae..7327d1c 100644 --- a/ch1/wavefront/index.html +++ b/ch1/wavefront/index.html @@ -1,5 +1,5 @@ Wavefronts of Computation - Domain Flow Architecture -

Wavefronts of Computation

Wavefronts of Computation

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/dfa/index.html b/ch2-moc/dfa/index.html index 108f670..f90dce0 100644 --- a/ch2-moc/dfa/index.html +++ b/ch2-moc/dfa/index.html @@ -1,5 +1,5 @@ Domain Flow Architecture -

Domain Flow Architecture

Domain Flow Architecture (DFA) machines are the class of machines that execute +

Domain Flow Architecture

Domain Flow Architecture (DFA) machines are the class of machines that execute using the domain flow execution model. The fundamental problem limiting the energy efficiency of the data flow machine is the size of the CAM and fabric. As they are managed as two separate clusters of resources, @@ -15,12 +15,12 @@ exhibit partial orders that are regular and are separated in space. That is a mouthful, but we can make this more tangible when we discuss in more detail the temporal behavior of a domain flow program in the -next section about time.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/dfm/index.html b/ch2-moc/dfm/index.html index 4456e28..d479169 100644 --- a/ch2-moc/dfm/index.html +++ b/ch2-moc/dfm/index.html @@ -3,7 +3,7 @@ write an operand into an appropriate operand slot in an instruction token stored in a Content Addressable Memory (CAM) by an instruction tag check if all operands are present to start the execution cycle of the instruction if an instruction is ready then extract it from the CAM and inject it into a fabric of computational elements deliver the instruction to an available execution unit execute the instruction, and finally write the result back into an operand slot in target instruction token stored in the CAM The strength of the resource contention management of the Data Flow Machine is that the machine can execute along the free schedule, that is, the inherent parallelism of the algorithm. Any physical implementation, however, is constrained by the energy-efficiency of the CAM and the network that connects the CAM to the fabric of computational elements. As concurrency demands grow the efficiency of both the CAM and the fabric decreases making large data flow machines unattractive. However, small data flow machines don’t have this problem and are able to deliver energy-efficient, low-latency resource management. Today, all high-performance microprocessors have a data flow machine at their core.">Data Flow Machine - Domain Flow Architecture -

Data Flow Machine

In the late 60’s and 70’s when computer scientists were exploring parallel +

Data Flow Machine

In the late 60’s and 70’s when computer scientists were exploring parallel computation by building the first parallel machines and developing the parallel algorithm complexity theory, folk realized that this over-constrained specification was a real problem for concurrency. @@ -18,12 +18,12 @@ decreases making large data flow machines unattractive. However, small data flow machines don’t have this problem and are able to deliver energy-efficient, low-latency resource management. Today, all high-performance microprocessors -have a data flow machine at their core.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/index.html b/ch2-moc/index.html index 5fc87e6..3335af4 100644 --- a/ch2-moc/index.html +++ b/ch2-moc/index.html @@ -7,7 +7,7 @@ Finite State Machines (FSM) Pushdown automata Turing machines Decision Tree Models Random Access Machine And parallel models of computation:">Resource Contention Management - Domain Flow Architecture -

Resource Contention Management

A model of computation is a model which describes how an +

Resource Contention Management

A model of computation is a model which describes how an output of a mathematical function is computed given an input. These models describe how units of computation, memories, and information (data) exchanges are organized. @@ -15,12 +15,12 @@ measure of the computational complexity of an algorithm independent of any specific physical implementation.

There are sequential models of computation:

  1. Finite State Machines (FSM)
  2. Pushdown automata
  3. Turing machines
  4. Decision Tree Models
  5. Random Access Machine

And parallel models of computation:

  1. Cellular Automata (CA)
  2. Kahn Process Networks
  3. Petri Nets
  4. Synchronous Data Flow

In this chapter, we’ll provide a summary of the Stored Program Machine, which provides an implementation of the Random Access Machine model of computation, and the Data Flow Machine, an implementation of the Synchronous Data Flow model of computation.

And we’ll introduce the new Domain Flow Architecture (DFA), which solves the problem of diminishing returns when trying to scale -up the Data Flow Machine.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/nextsteps/index.html b/ch2-moc/nextsteps/index.html index 36adde4..b034344 100644 --- a/ch2-moc/nextsteps/index.html +++ b/ch2-moc/nextsteps/index.html @@ -1,13 +1,13 @@ Next Steps - Domain Flow Architecture -

Next Steps

We have quickly introduced computer hardware organization to deliver +

Next Steps

We have quickly introduced computer hardware organization to deliver resource contention management. Our next step is to look at specific algorithms, and explore -their optimal parallel execution dynamics.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch2-moc/spm/index.html b/ch2-moc/spm/index.html index 8de0bf3..ba00dd4 100644 --- a/ch2-moc/spm/index.html +++ b/ch2-moc/spm/index.html @@ -3,19 +3,19 @@ fetch an instruction from the address pointed to by the IP register, and update IP to point to the next instruction decode instruction dispatch to appropriate execution units 4a. if execute unit is the load/store unit then request data from a memory location or provide data to store at memory location 4b. if execute unit is branch unit then load IP with new address 4c. else execute address/branch/arithmetic/logic/function operation store result of execute unit in register file This cycle is repeated till a halt instruction is executed, or an interrupt is issued.">Stored Program Machine - Domain Flow Architecture -

Stored Program Machine

After loading a program into the main memory of the Stored Program Machine, +

Stored Program Machine

After loading a program into the main memory of the Stored Program Machine, the Operating System writes the address of the entry point of the program into the Instruction Pointer (IP) register of the processor. After that initialization, a Stored Program Machine uses the following resource contention management mechanism to unabiguously execute a program:

  1. fetch an instruction from the address pointed to by the IP register, and update IP to point to the next instruction
  2. decode instruction
  3. dispatch to appropriate execution units 4a. if execute unit is the load/store unit then request data from a memory location or provide data to store at memory location 4b. if execute unit is branch unit then load IP with new address -4c. else execute address/branch/arithmetic/logic/function operation
  4. store result of execute unit in register file

This cycle is repeated till a halt instruction is executed, or an interrupt is issued.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/currentstate/index.html b/ch3-design/currentstate/index.html index 11ed9fe..8d0b2bf 100644 --- a/ch3-design/currentstate/index.html +++ b/ch3-design/currentstate/index.html @@ -1,5 +1,5 @@ Computational Dynamics - Domain Flow Architecture -

Computational Dynamics

A memory access in a physical machine can be very complex. For example, +

Computational Dynamics

A memory access in a physical machine can be very complex. For example, when a program accesses an operand located at an address that is not in physical memory, the processor registers a page miss. The performance difference between an access from the local L1 cache versus a page miss @@ -33,12 +33,12 @@ modulation due to power constraints, causes the collective to wait for the slowest process. As the number of processors grows, so does variability. And unfortunately, when variability rises processor -utilization drops and algorithmic performance suffers.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/elements/index.html b/ch3-design/elements/index.html index 0654ab7..fd585e3 100644 --- a/ch3-design/elements/index.html +++ b/ch3-design/elements/index.html @@ -15,7 +15,7 @@ Item #2 is well-known among high-performance algorithm designers. Item #3 is well-known among hardware designers and computer engineers. When designing domain flow algorithms, we are looking for an energy efficient embedding of a computational graph in space, and it is thus to be expected that we need to combine all three attributes of minimizing operator count, operand movement, and resource contention. The complexity of minimizing resource contention is what makes hardware design so much more complex. But the complexity of operator contention can be mitigated by clever resource contention management.">Elements of Design - Domain Flow Architecture -

Elements of Design

We can summarize the attributes of good parallel algorithm design as

  1. low operation count, where operation count is defined as the sum of operators and operand accesses
  2. minimal operand movement
  3. minimal resource contention

Item #1 is well-known by theoretical computer scientists.

Item #2 is well-known among high-performance algorithm designers.

Item #3 is well-known among hardware designers and computer engineers.

When designing domain flow algorithms, we are looking for an energy +

Elements of Design

We can summarize the attributes of good parallel algorithm design as

  1. low operation count, where operation count is defined as the sum of operators and operand accesses
  2. minimal operand movement
  3. minimal resource contention

Item #1 is well-known by theoretical computer scientists.

Item #2 is well-known among high-performance algorithm designers.

Item #3 is well-known among hardware designers and computer engineers.

When designing domain flow algorithms, we are looking for an energy efficient embedding of a computational graph in space, and it is thus to be expected that we need to combine all three attributes of minimizing operator count, operand movement, and resource contention. @@ -31,12 +31,12 @@ it forces a total order on the computation graph. This tasks of creating the total order falls on the algorithm designer.

For parallel execution we need a resource contention management mechanism that is more efficient. And this is where our -computational spacetime will come in handy.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/energy/index.html b/ch3-design/energy/index.html index a570916..cbc13ab 100644 --- a/ch3-design/energy/index.html +++ b/ch3-design/energy/index.html @@ -3,7 +3,7 @@ Fundamentally, the SPM relies on a request/reply protocol to get information from a memory. Otherwise stated, the resource contention mechanism deployed by a SPM uses a random access memory to store inputs, intermediate, and output values. And all this memory management uses this request/reply cycle. Which we now know is becoming less and less energy efficient compared to the actual computational event the algorithm requires. The sequential processing model is becoming less and less energy efficient.">Energy: the how - Domain Flow Architecture -

Energy: the how

Table 1 shows switching energy estimates of key computational events by process node. +

Energy: the how

Table 1 shows switching energy estimates of key computational events by process node. Data movement operations (reads and writes) have started to dominate energy consumption in modern processors. This makes a Stored Program Machine (SPM) less and less efficient. To counter this, all CPUs, GPUs, and DSPs have started to add instructions that amortize @@ -29,12 +29,12 @@ an unnecessary penalty. But the DFM does provide a hint of how to maintain fine-grain parallelism: its pipeline is a ring, which is an infinite, but bounded structure.

The Domain Flow Architecture (DFA) builds upon this observation and supports and maintains a local fine-grain spatial structure while offering an infinite computational -fabric with finite resources. DFA is to DFM as PIM is to SPM.

Values in picojoules (pJ) per operation

Operation Type28/22nm16/14/12nm7/6/5nm3nm2nm
32-bit Register Read0.0400.0250.0120.0080.006
32-bit Register Write0.0450.0280.0140.0090.007
32-bit ALU Operation0.1000.0600.0300.0200.015
32-bit FPU Add0.4000.2500.1200.0800.060
32-bit FPU Multiply0.8000.5000.2500.1700.130
32-bit FPU FMA1.0000.6000.3000.2000.150
32-bit Word Read L10.6250.3750.18750.1250.09375
32-bit Word Read L21.8751.1250.56250.3750.28125
32-bit Word Read DDR56.255.0003.7503.1252.8125
64-byte L1 Cache Read10.0006.0003.0002.0001.500
64-byte L2 Cache Read30.00018.0009.0006.0004.500
64-byte DDR5 Memory Read100.00080.00060.00050.00045.000

Table 1: Switching Energy Estimate by Process Node

note

  1. 32-bit cache and memory operations are derived from 64byte read energy
  2. Smaller process nodes generally reduces switching energy by roughly 40-50% per major node transition
\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/index.html b/ch3-design/index.html index ae5d513..6fef2da 100644 --- a/ch3-design/index.html +++ b/ch3-design/index.html @@ -1,16 +1,16 @@ Elements of Good Design - Domain Flow Architecture -

Elements of Good Design

The best algorithms for sequential execution are those that minimize the number +

Elements of Good Design

The best algorithms for sequential execution are those that minimize the number of operations to yield results. Computational complexity theory has aided this quest, but any performance-minded algorithm designer knows that the best theoretical algorithms are not necessarily the fastest when executed on real hardware. The difference is typically caused by the trade-off sequential algorithms have to make between computation and accessing memory. The constraints of data movement are even more pronounced -in parallel algorithms as demonstrated in the previous section.

This chapter explores the elements of good design for parallel algorithms and their execution on real hardware.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/nextsteps/index.html b/ch3-design/nextsteps/index.html index c0d419b..f77936c 100644 --- a/ch3-design/nextsteps/index.html +++ b/ch3-design/nextsteps/index.html @@ -3,19 +3,19 @@ Once we get a good collection of fast, and energy efficient algorithms together, we can start to explore how best to engineer combinations of these operators. We will discover that sometimes, the cost of an information exchange makes a whole class of algorithms unattractive for parallel executions. With that insight comes the need to create new algorithms and sometimes completely new mathematical approaches to properly leverage the available resources.">Next Steps - Domain Flow Architecture -

Next Steps

In this short introduction to parallel algorithms in general and domain flow +

Next Steps

In this short introduction to parallel algorithms in general and domain flow in particular, our next step is to look at specific algorithms, and explore their optimal parallel execution dynamics.

Once we get a good collection of fast, and energy efficient algorithms together, we can start to explore how best to engineer combinations of these operators. We will discover that sometimes, the cost of an information exchange makes a whole class of algorithms unattractive for parallel executions. With that insight comes the need to create new algorithms and sometimes completely new -mathematical approaches to properly leverage the available resources.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/space/index.html b/ch3-design/space/index.html index d095bce..9ac34ce 100644 --- a/ch3-design/space/index.html +++ b/ch3-design/space/index.html @@ -1,5 +1,5 @@ Space: the where - Domain Flow Architecture -

Space: the where

Space is a scarce resource, with a direct cost associated to it. A computational engine, +

Space: the where

Space is a scarce resource, with a direct cost associated to it. A computational engine, such as a Stored Program Machine, needs to allocate area for ALUs and register files, and to make these work well, even more space is required to surround these resources with cache hierarchies and memory @@ -43,12 +43,12 @@ real-time systems tend to favor fine-grain parallelism. Fine-grain parallel systems offer lower latencies, and an increasingly important benefit, energy efficiency. In the next chapter, we’ll discuss the techniques used to design spatial mappings -for fine-grained parallel machines.

Footnotes

1: Flynn, Michael J. (December 1966), Very high-speed computing systems

2: Flynn’s taxonomy Wikipedia

3: The Landscape of Parallel Computing Research: A View from Berkeley The Seven Dwarfs

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch3-design/switching-energy/index.html b/ch3-design/switching-energy/index.html index 28e4589..382ef9f 100644 --- a/ch3-design/switching-energy/index.html +++ b/ch3-design/switching-energy/index.html @@ -7,13 +7,13 @@ Register 28/22nm (fJ) 16/14/12nm (fJ) 7/6/5nm (fJ) 3nm (fJ) 2nm (fJ) Read bit 2.5 - 3.5 1.8 - 2.3 0.9 - 1.2 0.6 - 0.8 0.4 - 0.6 Write bit 3.0 - 4.0 2.0 - 2.8 1.1 - 1.5 0.7 - 1.0 0.5 - 0.8 Notes:">Switching Energy Estimates - Domain Flow Architecture -

Switching Energy Estimates

This page contains background information regarding the switching energy estimates so -important to designing energy-efficient data paths.

Register Read/Write Energy Estimates by Process Node

Note: Values are approximate and may vary by foundry and implementation

Register28/22nm (fJ)16/14/12nm (fJ)7/6/5nm (fJ)3nm (fJ)2nm (fJ)
Read bit2.5 - 3.51.8 - 2.30.9 - 1.20.6 - 0.80.4 - 0.6
Write bit3.0 - 4.02.0 - 2.81.1 - 1.50.7 - 1.00.5 - 0.8

Notes:

  • Values assume typical operating conditions (TT corner, nominal voltage, 25°C)
  • Energy includes both dynamic and short-circuit power
  • Leakage power not included
  • Values are for basic register operations without additional clock tree or routing overhead
  • Advanced nodes (3nm, 2nm) are based on early estimates and projections

Register file energy estimates

All values in femtojoules per bit (fJ/bit)

OperationSize28/22nm16/14/12nm7/6/5nm3nm2nm
Read
32-entry8.5 - 10.56.00 - 7.503.20 - 4.002.25 - 2.801.57 - 1.95
64-entry12.0 - 14.08.50 - 10.004.50 - 5.503.15 - 3.852.21 - 2.70
128-entry16.0 - 18.011.00 - 13.006.00 - 7.004.20 - 4.902.95 - 3.45
Write
32-entry10.0 - 12.07.00 - 8.503.80 - 4.602.65 - 3.251.85 - 2.28
64-entry14.0 - 16.010.00 - 11.505.20 - 6.203.65 - 4.352.55 - 3.05
128-entry18.0 - 20.013.00 - 15.07.00 - 8.004.90 - 5.603.45 - 3.95

Notes:

  • All values in femtojoules per bit (fJ/bit)
  • Assumes typical operating conditions (TT corner, nominal voltage, 25°C)
  • Includes decoder, wordline, and bitline energy
  • Includes local clock distribution
  • Includes both dynamic and short-circuit power
  • Values represent single read port, single write port configuration

Integer Arithmetic and Logic Unit Switching Energy Estimates

Unit TypeBit Size28/22nm (pJ)16/14/12nm (pJ)7/6/5nm (pJ)3nm (pJ)2nm (pJ)
CPU ALU
8-bit0.45 - 0.650.30 - 0.430.20 - 0.290.13 - 0.190.09 - 0.13
16-bit0.90 - 1.300.60 - 0.860.40 - 0.580.26 - 0.380.18 - 0.26
24-bit1.35 - 1.950.90 - 1.300.60 - 0.870.39 - 0.570.27 - 0.40
32-bit1.80 - 2.601.20 - 1.730.80 - 1.160.52 - 0.760.36 - 0.53
40-bit2.25 - 3.251.50 - 2.161.00 - 1.450.65 - 0.950.45 - 0.66
48-bit2.70 - 3.901.80 - 2.601.20 - 1.740.78 - 1.140.54 - 0.79
56-bit3.15 - 4.552.10 - 3.031.40 - 2.030.91 - 1.330.63 - 0.92
64-bit3.60 - 5.202.40 - 3.471.60 - 2.321.04 - 1.520.72 - 1.05
GPU ALU
8-bit0.60 - 0.850.40 - 0.570.27 - 0.380.17 - 0.250.12 - 0.17
16-bit1.20 - 1.700.80 - 1.140.53 - 0.760.35 - 0.500.24 - 0.35
24-bit1.80 - 2.551.20 - 1.710.80 - 1.140.52 - 0.750.36 - 0.52
32-bit2.40 - 3.401.60 - 2.281.07 - 1.520.69 - 1.000.48 - 0.70
40-bit3.00 - 4.252.00 - 2.851.33 - 1.900.86 - 1.250.60 - 0.87
48-bit3.60 - 5.102.40 - 3.421.60 - 2.281.04 - 1.500.72 - 1.04
56-bit4.20 - 5.952.80 - 3.991.87 - 2.661.21 - 1.750.84 - 1.21
64-bit4.80 - 6.803.20 - 4.562.13 - 3.041.38 - 2.000.96 - 1.38
DSP ALU
8-bit0.55 - 0.750.37 - 0.530.25 - 0.350.16 - 0.230.11 - 0.16
16-bit1.10 - 1.500.73 - 1.000.49 - 0.700.32 - 0.460.22 - 0.32
24-bit1.65 - 2.251.10 - 1.500.73 - 1.050.48 - 0.690.33 - 0.48
32-bit2.20 - 3.001.47 - 2.000.98 - 1.400.63 - 0.920.44 - 0.64
40-bit2.75 - 3.751.83 - 2.501.22 - 1.750.79 - 1.150.55 - 0.80
48-bit3.30 - 4.502.20 - 3.001.47 - 2.100.95 - 1.380.66 - 0.96
56-bit3.85 - 5.252.57 - 3.501.71 - 2.451.11 - 1.610.77 - 1.12
64-bit4.40 - 6.002.93 - 4.001.96 - 2.801.27 - 1.840.88 - 1.28

Notes:

  • Values are approximate switching energy in picojoules (pJ)
  • Represents typical dynamic switching energy per operation
  • Accounts for:
    • Arithmetic data path logic operations
    • Typical instruction mix for each design point

Floating-Point Unit Switching Energy Estimates

Unit TypeBit Size28/22nm (pJ)16/14/12nm (pJ)7/6/5nm (pJ)3nm (pJ)2nm (pJ)
CPU FPU
8-bit1.20 - 1.700.80 - 1.140.53 - 0.760.35 - 0.500.24 - 0.35
16-bit1.80 - 2.601.20 - 1.730.80 - 1.160.52 - 0.760.36 - 0.53
32-bit3.60 - 5.202.40 - 3.471.60 - 2.321.04 - 1.520.72 - 1.05
64-bit7.20 - 10.404.80 - 6.933.20 - 4.642.08 - 3.041.44 - 2.10
GPU FPU
8-bit1.60 - 2.301.07 - 1.530.71 - 1.020.46 - 0.660.32 - 0.46
16-bit2.40 - 3.401.60 - 2.281.07 - 1.520.69 - 1.000.48 - 0.70
32-bit4.80 - 6.803.20 - 4.562.13 - 3.041.38 - 2.000.96 - 1.38
64-bit9.60 - 13.606.40 - 9.134.27 - 6.082.76 - 4.001.92 - 2.76
DSP FPU
8-bit1.40 - 2.000.93 - 1.330.62 - 0.890.40 - 0.580.28 - 0.40
16-bit2.20 - 3.001.47 - 2.000.98 - 1.400.63 - 0.920.44 - 0.64
32-bit4.40 - 6.002.93 - 4.001.96 - 2.801.27 - 1.840.88 - 1.28
64-bit8.80 - 12.005.87 - 8.003.91 - 5.602.54 - 3.681.76 - 2.56

Notes:

  • Values are approximate switching energy in picojoules (pJ)
  • 8-bit FPU estimates based on IEEE fp8 standard
  • Represents typical dynamic switching energy per operation
  • Accounts for:
    • Arithmetic logic operations
    • Floating-point operations (for FPU)
    • Typical instruction mix for each design point
\ No newline at end of file diff --git a/ch3-design/time/index.html b/ch3-design/time/index.html index 76e5c7c..0e2cd95 100644 --- a/ch3-design/time/index.html +++ b/ch3-design/time/index.html @@ -3,7 +3,7 @@ Let x be a computation that uses y as input, then the free schedule is defined as: \begin{equation} T(x) =\begin{cases} 1, & \text{if y is an external input}\\ 1 + max(T(y)) & \text{otherwise} \end{cases} \end{equation} The free schedule is defined at the level of the individual operations. It does not provide any information about the global data movement or the global structure of the interactions between data and operation. Moreover, the above equation describes a logical sequencing of operations, it does not specify a physical evolution.">Time: the when - Domain Flow Architecture -

Time: the when

The free schedule represents the inherent concurrency of the parallel algorithm, and, under the assumption +

Time: the when

The free schedule represents the inherent concurrency of the parallel algorithm, and, under the assumption of infinite resources, it is the fastest schedule possible.

Let x be a computation that uses y as input, then the free schedule is defined as: \begin{equation} T(x) =\begin{cases} @@ -66,12 +66,12 @@ recurrence equations has practical application for the design of optimal computational data paths. The Domain Flow model uses the Karp, Miller, and Winograd piecewise linear scheduling -construction to sequence activity wavefronts.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch4/index.html b/ch4/index.html index 2f92d53..d508822 100644 --- a/ch4/index.html +++ b/ch4/index.html @@ -7,16 +7,16 @@ components in computational methods, the investment can pay high dividends.">Basic Linear Algebra - Domain Flow Architecture -

Basic Linear Algebra

Basic Linear Algebra Subroutines are an historically significant set of +

Basic Linear Algebra

Basic Linear Algebra Subroutines are an historically significant set of functions that encapsulate the basic building blocks of a large collection of linear algebra algorithms and implementation.

The BLAS library has proven to be a very productive mechanism to create and disseminate highly optimized numerical libraries to a plethora of computer architectures and machines. Writing high-performance linear -algebra algorithms turns out to be a tenacious problem, but since linear algebra operations are essential
components in computational methods, the investment can pay high dividends.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch4/level1/index.html b/ch4/level1/index.html index 4cdd0fd..60dd70b 100644 --- a/ch4/level1/index.html +++ b/ch4/level1/index.html @@ -7,7 +7,7 @@ vector scale: scalar-vector multiplication: $z = \alpha x \implies (z_i = \alpha x_i)$ vector element addition: $z = x + y \implies (z_i = x_i + y_i)$ vector element multiply: $z = x * y \implies (z_i = x_i * y_i)$ vector dot product: $c = x^Ty \implies ( c = \sum_{i = 1}^n x_i y_i ) $, aka inner-product saxpy, or scalar alpha x plus y, $z = \alpha x + y \implies z_i = \alpha x_i + y_i $ The fifth operator, while technically redundant, makes the expressions of linear algebra algorithms more concise.">BLAS Level 1 - Domain Flow Architecture -

BLAS Level 1

BLAS Level 1 are $\mathcal{O}(N)$ class operators. This makes these operators operand access limited +

BLAS Level 1

BLAS Level 1 are $\mathcal{O}(N)$ class operators. This makes these operators operand access limited and thus require careful distribution in a parallel environment.

There are four basic vector operations, and a fifth convenience operators. Let $ \alpha \in \Bbb{R}, x \in \Bbb{R^n}, y \in \Bbb{R^n}, z \in \Bbb{R^n}$$ then:

  1. vector scale: scalar-vector multiplication: $z = \alpha x \implies (z_i = \alpha x_i)$
  2. vector element addition: $z = x + y \implies (z_i = x_i + y_i)$
  3. vector element multiply: $z = x * y \implies (z_i = x_i * y_i)$
  4. vector dot product: $c = x^Ty \implies ( c = \sum_{i = 1}^n x_i y_i ) $, aka inner-product
  5. saxpy, or scalar alpha x plus y, $z = \alpha x + y \implies z_i = \alpha x_i + y_i $

The fifth operator, while technically redundant, makes the expressions of linear algebra algorithms more concise.

One class of domain flow programs for these operators assumes a linear distribution of the vectors, @@ -51,12 +51,12 @@ z: alpha[i-1,j,k] * x[i,j-1,k] + y[i,j,k-1] }

The final scalar alpha x plus y, or saxpy operator is combining the vector scale and vector addition operators, and will show the same constraint as the vector scale -operator due to the required propagation broadcast of the scaling factor.

\ No newline at end of file diff --git a/ch4/level2/index.html b/ch4/level2/index.html index 2155416..0a612a1 100644 --- a/ch4/level2/index.html +++ b/ch4/level2/index.html @@ -3,7 +3,7 @@ Let $A \in \Bbb{R^{mxn}}$, the matrix-vector product is defined as: $$z = Ax, \space where \space x \in \Bbb{R^n}$$">BLAS Level 2 - Domain Flow Architecture -

BLAS Level 2

BLAS Level 2 are $\mathcal{O}(N^2)$ class operators, still operand access +

BLAS Level 2

BLAS Level 2 are $\mathcal{O}(N^2)$ class operators, still operand access limited as we need to fetch multiple operands per operation without any reuse. The core operator is the matrix-vector multiplication in all its different forms specialized for matrix shape — triangular, banded, symmetric —, and matrix type — integer, real, complex, @@ -15,12 +15,12 @@ x: x[i,j-1,k] z: a[i,j,k-1] * x[i,j-1,k] }

Banded, symmetric, and triangular versions simply alter the constraint set of the domains of -computation: the fundamental dependencies do not change.

\ No newline at end of file diff --git a/ch4/level3/index.html b/ch4/level3/index.html index f2d255b..5da2fda 100644 --- a/ch4/level3/index.html +++ b/ch4/level3/index.html @@ -11,7 +11,7 @@ In addition to matrix-matrix multiply there are the Rank-k update operators, which are outer-products and matrix additions. Here is a Hermitian Rank-k update: $$ C = \alpha A A^T + \beta C, \space where \space C \space is \space Hermitian. $$ A Hermitian matrix is defined as a matrix that is equal to its Hermitian conjugate. In other words, the matrix $C$ is Hermitian if and only if $C = C^H$. Obviously a Hermitian matrix must be square. Hermitian matrices can be understood as the complex extension of real symmetric matrices.">BLAS Level 3 - Domain Flow Architecture -

BLAS Level 3

BLAS Level 3 are $\mathcal{O}(N^3)$ operators, and finally compute bound +

BLAS Level 3

BLAS Level 3 are $\mathcal{O}(N^3)$ operators, and finally compute bound creating many opportunities to optimize operand reuse.

In addition to matrix-matrix multiply there are the Rank-k update operators, which are outer-products and matrix additions.

Here is a Hermitian Rank-k update:

$$ C = \alpha A A^T + \beta C, \space where \space C \space is \space Hermitian. $$

A Hermitian matrix is defined as a matrix that is equal to its Hermitian conjugate. In other words, the matrix $C$ is Hermitian if and only if $C = C^H$. Obviously a Hermitian @@ -32,12 +32,12 @@ c: c[i,j,k-1] + a[i,j-1,k] * b[i-1,j,k] } } -

Here we introduce a conditional constraint that impacts the domain of computation for a set of equations.

\ No newline at end of file diff --git a/ch5/factorization/index.html b/ch5/factorization/index.html index a2d440d..7f1c91e 100644 --- a/ch5/factorization/index.html +++ b/ch5/factorization/index.html @@ -3,12 +3,12 @@ $$ x = {-b \pm \sqrt{b^2-4ac} \over 2a} $$">Matrix Factorizations - Domain Flow Architecture -

Matrix Factorizations

This is the quadratic equation:

$$ x = {-b \pm \sqrt{b^2-4ac} \over 2a} $$
\ No newline at end of file diff --git a/ch5/index.html b/ch5/index.html index dc6a782..944ab99 100644 --- a/ch5/index.html +++ b/ch5/index.html @@ -1,12 +1,12 @@ Matrix Factorization - Domain Flow Architecture -

Matrix Factorization

Matrix factorizations are the work horse of linear algebra applications. +

Matrix Factorization

Matrix factorizations are the work horse of linear algebra applications. Factorizations create equivalences that improve the usability or robustness -of an algorithm.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch6/index.html b/ch6/index.html index 250b0b8..009c399 100644 --- a/ch6/index.html +++ b/ch6/index.html @@ -1,12 +1,12 @@ Matrix Kernels - Domain Flow Architecture -

Matrix Kernels

Matrix Kernels are important to characterize and classify the underlying system of equations. +

Matrix Kernels

Matrix Kernels are important to characterize and classify the underlying system of equations. Identifying singularity, and quantifying the null-space of a matrix are key operators -before we can try to solve systems of equations.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch6/matrixkernels/index.html b/ch6/matrixkernels/index.html index c094536..55c10cb 100644 --- a/ch6/matrixkernels/index.html +++ b/ch6/matrixkernels/index.html @@ -7,16 +7,16 @@ $L$ is the vector space of all elements $v$ of $V$ such that $L(v) = 0$, where 0 denotes the zero vector in $W, or more symbolically:">Matrix Kernels - Domain Flow Architecture -

Matrix Kernels

In mathematics, the kernel of a linear map, also known as the null space or nullspace, is the linear subspace +

Matrix Kernels

In mathematics, the kernel of a linear map, also known as the null space or nullspace, is the linear subspace of the domain of the map which is mapped to the zero vector. That is, given a linear map

$$L : V \rightarrow W$$ between two vector spaces $V$ and $W$, the kernel of

$L$ is the vector space of all elements $v$ of $V$ such that $L(v) = 0$, -where 0 denotes the zero vector in $W, or more symbolically:

$$ker(L) = \{ v \in V \hspace1ex | \hspace1ex L(v) = 0\} = L^{-1}(0)$$.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch7/index.html b/ch7/index.html index a73c05c..1556156 100644 --- a/ch7/index.html +++ b/ch7/index.html @@ -1,10 +1,10 @@ Linear Solvers - Domain Flow Architecture -

Linear Solvers

Solving systems of equations is the impetus for the class of algorithms called linear solvers.

\ No newline at end of file diff --git a/ch7/lu/index.html b/ch7/lu/index.html index 4141193..cd47e08 100644 --- a/ch7/lu/index.html +++ b/ch7/lu/index.html @@ -3,15 +3,15 @@ $$A = L \otimes U$$.">Gaussian Elimination - Domain Flow Architecture -

Gaussian Elimination

Gaussian Elimination, also known as $LU$ decomposition, decomposes a linear transformation +

Gaussian Elimination

Gaussian Elimination, also known as $LU$ decomposition, decomposes a linear transformation defined by the matrix $A$ into a lower-triangular matrix $L$, and an upper-triangular matrix $U$ -such that

$$A = L \otimes U$$.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch7/solvers/index.html b/ch7/solvers/index.html index 0de2073..6e6933e 100644 --- a/ch7/solvers/index.html +++ b/ch7/solvers/index.html @@ -1,10 +1,10 @@ Linear Solvers - Domain Flow Architecture -

Linear Solvers

Linear solvers are algorithms designed to solve a linear system of equations.

\ No newline at end of file diff --git a/ch8/conditioning/index.html b/ch8/conditioning/index.html index 860fef0..5c1c83f 100644 --- a/ch8/conditioning/index.html +++ b/ch8/conditioning/index.html @@ -1,10 +1,10 @@ Signal Conditioning - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/filters/index.html b/ch8/filters/index.html index 313ad1c..4a05766 100644 --- a/ch8/filters/index.html +++ b/ch8/filters/index.html @@ -1,10 +1,10 @@ Digital Filtering - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/identification/index.html b/ch8/identification/index.html index 95c7cd0..f015091 100644 --- a/ch8/identification/index.html +++ b/ch8/identification/index.html @@ -3,7 +3,7 @@ When there are signals and noises, physicists try to identify signals by modeling them, whereas statisticians oppositely try to model noise to identify signals. In this study, we applied the statisticians’ concept of signal detection of physics data with small-size samples and high dimensions without modeling the signals. Most of the data in nature, whether noises or signals, are assumed to be generated by dynamical systems; thus, there is essentially no distinction between these generating processes. We propose that the correlation length of a dynamical system and the number of samples are crucial for the practical definition of noise variables among the signal variables generated by such a system. Since variables with short-term correlations reach normal distributions faster as the number of samples decreases, they are regarded to be noise-like variables, whereas variables with opposite properties are signal-like variables. Normality tests are not effective for data of small-size samples with high dimensions. Therefore, we modeled noises on the basis of the property of a noise variable, that is, the uniformity of the histogram of the probability that a variable is a noise. We devised a method of detecting signal variables from the structural change of the histogram according to the decrease in the number of samples. We applied our method to the data generated by globally coupled map, which can produce time series data with different correlation lengths, and also applied to gene expression data, which are typical static data of small-size samples with high dimensions, and we successfully detected signal variables from them.">Identification - Domain Flow Architecture -

Identification

Identification is the act of recognizing the signal in the presence of noise.

When there are signals and noises, physicists try to identify signals by modeling them, +

Identification

Identification is the act of recognizing the signal in the presence of noise.

When there are signals and noises, physicists try to identify signals by modeling them, whereas statisticians oppositely try to model noise to identify signals. In this study, we applied the statisticians’ concept of signal detection of physics data with small-size samples and high dimensions without modeling the signals. Most of the data in nature, @@ -22,12 +22,12 @@ to the data generated by globally coupled map, which can produce time series data with different correlation lengths, and also applied to gene expression data, which are typical static data of small-size samples with high dimensions, and we successfully -detected signal variables from them.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch8/index.html b/ch8/index.html index 4b38f9c..1206b57 100644 --- a/ch8/index.html +++ b/ch8/index.html @@ -1,13 +1,13 @@ Digital Signal Processing - Domain Flow Architecture -

Digital Signal Processing

Digital Signal Processing is the discrete realization of Analog Signal Processing +

Digital Signal Processing

Digital Signal Processing is the discrete realization of Analog Signal Processing operations used to condition, amplify, characterize, and transform. Digital Signal Processing is essential when interfacing a digital computer -to a physical process to enable reproducible and high-fidelity applications.

\ No newline at end of file + 
\ No newline at end of file diff --git a/ch8/spectral/index.html b/ch8/spectral/index.html index 30ec306..df089a0 100644 --- a/ch8/spectral/index.html +++ b/ch8/spectral/index.html @@ -1,10 +1,10 @@ Spectral Analysis - Domain Flow Architecture -
\ No newline at end of file diff --git a/ch8/transforms/index.html b/ch8/transforms/index.html index f0dadc3..0dc86d9 100644 --- a/ch8/transforms/index.html +++ b/ch8/transforms/index.html @@ -1,10 +1,10 @@ Transforms - Domain Flow Architecture -
\ No newline at end of file diff --git a/contentdev/index.html b/contentdev/index.html index 5230fa7..893cca4 100644 --- a/contentdev/index.html +++ b/contentdev/index.html @@ -1,11 +1,11 @@ Content Development - Domain Flow Architecture -

Content Development

The following pages are examples for content developers to quickly add interactive -content that aids in understanding parallel algorithm design and optimization.

\ No newline at end of file diff --git a/contentdev/prototype/index.html b/contentdev/prototype/index.html index c3157db..2f46c45 100644 --- a/contentdev/prototype/index.html +++ b/contentdev/prototype/index.html @@ -3,7 +3,7 @@ All you need is a tag with an id and some CSS styling and a call into an animation program that fills that canvas.">Prototype - Domain Flow Architecture -

Prototype

Prototype

This is a basic skeleton of a Hugo Markdown page that includes a three.js animation.

All you need is a <canvas> tag with an id and some CSS styling and a call into -an animation program that fills that canvas.

\ No newline at end of file + 
\ No newline at end of file diff --git a/index.html b/index.html index cd1e84d..20099ae 100644 --- a/index.html +++ b/index.html @@ -1,5 +1,5 @@ Domain Flow Architectures -

Search

-

\ No newline at end of file diff --git a/tags/algorithm/index.html b/tags/algorithm/index.html index 3ec5adf..e102683 100644 --- a/tags/algorithm/index.html +++ b/tags/algorithm/index.html @@ -1,10 +1,10 @@ Algorithm - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/computational-spacetime/index.html b/tags/computational-spacetime/index.html index 58ac85d..83fc4a6 100644 --- a/tags/computational-spacetime/index.html +++ b/tags/computational-spacetime/index.html @@ -1,10 +1,10 @@ Computational-Spacetime - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/conditioning/index.html b/tags/conditioning/index.html index 5f129b1..c10de70 100644 --- a/tags/conditioning/index.html +++ b/tags/conditioning/index.html @@ -1,10 +1,10 @@ Conditioning - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/derivation/index.html b/tags/derivation/index.html index da848de..46487b2 100644 --- a/tags/derivation/index.html +++ b/tags/derivation/index.html @@ -1,10 +1,10 @@ Derivation - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/domain-flow/index.html b/tags/domain-flow/index.html index b709dfe..bf024d3 100644 --- a/tags/domain-flow/index.html +++ b/tags/domain-flow/index.html @@ -1,10 +1,10 @@ Domain-Flow - Tag - Domain Flow Architecture -

Tag - Domain-Flow

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/tags/dsp/index.html b/tags/dsp/index.html index 5331c13..f50129a 100644 --- a/tags/dsp/index.html +++ b/tags/dsp/index.html @@ -1,10 +1,10 @@ Dsp - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/filtering/index.html b/tags/filtering/index.html index 238fa42..b039b7b 100644 --- a/tags/filtering/index.html +++ b/tags/filtering/index.html @@ -1,10 +1,10 @@ Filtering - Tag - Domain Flow Architecture -

Tag - Filtering

D

\ No newline at end of file diff --git a/tags/free-schedule/index.html b/tags/free-schedule/index.html index 24934cd..f91b9e9 100644 --- a/tags/free-schedule/index.html +++ b/tags/free-schedule/index.html @@ -1,10 +1,10 @@ Free-Schedule - Tag - Domain Flow Architecture -

Tag - Free-Schedule

F

\ No newline at end of file diff --git a/tags/identification/index.html b/tags/identification/index.html index 24afdff..a45b341 100644 --- a/tags/identification/index.html +++ b/tags/identification/index.html @@ -1,10 +1,10 @@ Identification - Tag - Domain Flow Architecture -

Tag - Identification

I

\ No newline at end of file diff --git a/tags/index-space/index.html b/tags/index-space/index.html index 95841cd..d593523 100644 --- a/tags/index-space/index.html +++ b/tags/index-space/index.html @@ -1,10 +1,10 @@ Index-Space - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/index.html b/tags/index.html index 46e2633..82e4500 100644 --- a/tags/index.html +++ b/tags/index.html @@ -1,10 +1,10 @@ Tags - Domain Flow Architecture - \ No newline at end of file diff --git a/tags/lattice/index.html b/tags/lattice/index.html index 0438d23..7534666 100644 --- a/tags/lattice/index.html +++ b/tags/lattice/index.html @@ -1,10 +1,10 @@ Lattice - Tag - Domain Flow Architecture -
\ No newline at end of file diff --git a/tags/linear-schedule/index.html b/tags/linear-schedule/index.html index 28bcf6c..642a042 100644 --- a/tags/linear-schedule/index.html +++ b/tags/linear-schedule/index.html @@ -1,10 +1,10 @@ Linear-Schedule - Tag - Domain Flow Architecture -

Tag - Linear-Schedule

L

\ No newline at end of file diff --git a/tags/matrix-multiply/index.html b/tags/matrix-multiply/index.html index 16bff13..8c97049 100644 --- a/tags/matrix-multiply/index.html +++ b/tags/matrix-multiply/index.html @@ -1,10 +1,10 @@ Matrix-Multiply - Tag - Domain Flow Architecture -

Tag - Matrix-Multiply

A

  • An Example

C

D

F

L

P

\ No newline at end of file diff --git a/tags/spectral-analysis/index.html b/tags/spectral-analysis/index.html index a35c0c6..87c63b2 100644 --- a/tags/spectral-analysis/index.html +++ b/tags/spectral-analysis/index.html @@ -1,10 +1,10 @@ Spectral-Analysis - Tag - Domain Flow Architecture -

Tag - Spectral-Analysis

S

\ No newline at end of file diff --git a/tags/transform/index.html b/tags/transform/index.html index c4bd636..7e75ece 100644 --- a/tags/transform/index.html +++ b/tags/transform/index.html @@ -1,10 +1,10 @@ Transform - Tag - Domain Flow Architecture -

Tag - Transform

T

  • Transforms
\ No newline at end of file