Skip to content

Commit

Permalink
CUDA11
Browse files Browse the repository at this point in the history
* CUDA11 initial work. First, we generate the new enums

* Added generateEnums, which generates the Go version of the CUresult type

* Updated tests such that they no longer fail.
Added a Signal() method to BatchedContext, to force the BatchedContext to DoWork

* Updated benchmarking of batched vs no batched context. It would appear that for now Batching no longer confers a benefit

* Attempt #4 at getting CUDA11. Previous attempts were working based off a faulty copy of `cuda.h`

- Updated Device to support UUID
- Updated README
- Updated genlib to do more things more carefully

* More work on CUDA11
- Added more mappings into mappings.go to generate stufff
- Changed the definition of Context, by adding one additional method to clear L2Cache
- Added stubs for LaunchCooperativeKernel
- Added Graph types.

TODO next: add all the basic Graph data structure and then autogenerate all the things!

* Fixed mappings to also include @egonelbre's change in 2e25e65
Fixed a bug where Fix() wasn't called, leading to weird generations

* Added some graph stuff, fixed some mappings stuff for genAPI. It seems that the graph functions will have to be manually written for now

* Updated graph.go from ages ago

* Updated more of CUDA11 Graph API into the library.
Slowly getting there.

* Added the body of CopyParams

* Added AddMemsetNode method for Graph.

* Fixed a bunch of things

* Switched to modernc.org/cc instead of using the older github.com/cznic/cc

* cuDNN updated their website. So parse.py also has to change.
As a result moredecls.go also changed

* Sorted the data in mappings.go. This will allow for better diffing

* Updated the generatethis pipeline

* Initial mappings generation.

* Mapped the old commented out mappings to new commented out mappings (see mappings.ods)

* Generated enums.

* Updated enums and enum strings

* Added more generated data structures

* Added methods

* Generated stubs. 7 TODOs

* Added more incompletes report

* Manually fixed the TODO of SpatialTransformer

* Manually fixed generated_rnndata.go

* Manually fixed generated_seqdata.go

* Manually fixed generated_backend.go

* Manually fixed generated_tensortransform.go

* Fixed the missing getters

* fixed all the .C()s of the generated types

* Generated a new API

* Fixed random C int issues. Now to handle the rest

* Updated INCOMPLETES_REPORTS

* fixed variable collition in _BackendAttributeTypeNames

* gencudnn enum generation syntax fixes added

* Updated INCOMPLETES

* variable renaming added as per the review

* AlgorithmDescriptor syntax fixes added

* AlgorithmPerformance syntax fixes added

* Activation cudnnActivationDescriptor_t return method name change added

* syntax fixes added on FusedOpVariantParams

* FusedOpConsts syntax fixes added

* C type retrieve function added for cudnnStatus

* tensor file syntax fixes added
tensor file unreachable code removed

* method receiver renaming added

* optensor syntax fixes added

* generated_api syntax fixes added

* code review changes added

* go modules updated
algorithmdescriptor Algorithm type changes added

* review changes added
GetRNNLinLayerBiasParams & GetRNNLinLayerMatrixParams methods moved to manually written API.go file

* Fixed a bug in parse.py where when parsing the documentation for CUDA11, the function names have `()`

* Removed deprecated functions from being generated

* More deprecated stuff no longer generated

* Fixed up algorithmdescriptor.go

* fixed some auto generated issues

* Manually fixed the fused ops generation

* Fixed even more autogenerated errors

* Fixed up more of the auto generated issues

* Renamed API to todo, because eh, I'll figure it out later

Co-authored-by: Aruna Prabhashwara <[email protected]>
  • Loading branch information
chewxy and wgarunap authored Aug 2, 2021
1 parent 5b83640 commit a41082c
Show file tree
Hide file tree
Showing 74 changed files with 5,742 additions and 2,345 deletions.
3 changes: 3 additions & 0 deletions batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ func (ctx *BatchedContext) enqueue(c call) (retVal DevicePtr, err error) {
// WorkAvailable returns the chan where work availability is broadcasted on.
func (ctx *BatchedContext) WorkAvailable() <-chan struct{} { return ctx.workAvailable }

// Signal is used to tell the context that work is available
func (ctx *BatchedContext) Signal() { ctx.workAvailable <- struct{}{} }

// DoWork waits for work to come in from the queue. If it's blocking, the entire queue will be processed immediately.
// Otherwise it will be added to the batch queue.
func (ctx *BatchedContext) DoWork() {
Expand Down
57 changes: 44 additions & 13 deletions batch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package cu
import (
"log"
"runtime"
"sync/atomic"
"testing"
"unsafe"

_ "net/http/pprof"
)

func TestBatchContext(t *testing.T) {
Expand Down Expand Up @@ -123,8 +126,8 @@ func TestLargeBatch(t *testing.T) {

dev.TotalMem()

beforeFree, _, _ := MemInfo()
ctx := newContext(cuctx)
beforeFree, _, _ := MemInfo()
bctx := NewBatchedContext(ctx, dev)

runtime.LockOSThread()
Expand All @@ -140,7 +143,8 @@ func TestLargeBatch(t *testing.T) {
}
size := int64(len(a) * 4)

go func() {
var freeCount uint32
go func(fc *uint32) {
var memA, memB DevicePtr
var frees []DevicePtr

Expand Down Expand Up @@ -175,13 +179,13 @@ func TestLargeBatch(t *testing.T) {

bctx.MemcpyDtoH(unsafe.Pointer(&a[0]), memA, size)
bctx.MemcpyDtoH(unsafe.Pointer(&b[0]), memB, size)
log.Printf("Number of frees %v", len(frees))
for _, free := range frees {
bctx.MemFree(free)
}
atomic.AddUint32(fc, uint32(len(frees)))
bctx.workAvailable <- struct{}{}
doneChan <- struct{}{}
}()
}(&freeCount)

loop:
for {
Expand All @@ -205,14 +209,18 @@ loop:
break
}
}

mod.Unload()
afterFree, _, _ := MemInfo()
cuctx.Destroy()
runtime.GC()

if freeCount != 16114 {
t.Errorf("Expected 16114 frees. Got %d instead", freeCount)
}
if afterFree != beforeFree {
t.Errorf("Before: Freemem: %v. After %v | Diff %v", beforeFree, afterFree, (beforeFree-afterFree)/1024)
t.Logf("Before: Freemem: %v. After %v | Diff %v", beforeFree, afterFree, (beforeFree-afterFree)/1024)
}
mod.Unload()
cuctx.Destroy()

}

func BenchmarkNoBatching(bench *testing.B) {
Expand Down Expand Up @@ -285,6 +293,10 @@ func BenchmarkNoBatching(bench *testing.B) {
bench.Fatalf("Failed to copy memory to b: %v", err)
}
}
// useful for checking results
// if i == 0 {
// bench.Logf("%v", a[:10])
// }
}
MemFree(memA)
MemFree(memB)
Expand Down Expand Up @@ -347,20 +359,39 @@ func BenchmarkBatching(bench *testing.B) {
workAvailable := bctx.WorkAvailable()
for i := 0; i < bench.N; i++ {
for j := 0; j < 100; j++ {
select {
case <-workAvailable:
bctx.DoWork()
default:
done := make(chan struct{}, 1)
go func(done chan struct{}) {
bctx.MemcpyHtoD(memA, unsafe.Pointer(&a[0]), size)
bctx.MemcpyHtoD(memB, unsafe.Pointer(&b[0]), size)
bctx.LaunchKernel(fn, 100, 10, 1, 1000, 1, 1, 0, Stream{}, args)
bctx.Synchronize()
bctx.MemcpyDtoH(unsafe.Pointer(&a[0]), memA, size)
bctx.MemcpyDtoH(unsafe.Pointer(&b[0]), memB, size)
bctx.Signal()
done <- struct{}{}
}(done)

work:
for {
select {
case <-workAvailable:
bctx.DoWork()
case <-done:
break work
}
}

}

if err := bctx.Errors(); err != nil {
bench.Fatalf("Failed with errors in benchmark %d. Error: %v", i, err)
}
}

// useful for checking results
// if i == 0 {
// bench.Logf("%v", a[:10])
// }
}
MemFree(memA)
MemFree(memB)
mod.Unload()
Expand Down
2 changes: 1 addition & 1 deletion blas/cgoflags.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ package cublas

// #cgo CFLAGS: -I/usr/local/cuda-9.0/targets/x86_64-linux/include -I/usr/local/cuda/include
// #cgo LDFLAGS: -lcublas
// #cgo LDFLAGS: -L/usr/local/cuda-9.0/targets/x86_64-linux/lib -L/usr/local/cuda/lib64
// #cgo LDFLAGS: -L/usr/local/cuda-9.0/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/lib/x86_64-linux-gnu
import "C"
7 changes: 3 additions & 4 deletions blas/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"github.com/pkg/errors"
"gonum.org/v1/gonum/blas"
"gorgonia.org/cu"
"gorgonia.org/cu/blas"
cublas "gorgonia.org/cu/blas"
"gorgonia.org/tensor"
)

Expand Down Expand Up @@ -77,9 +77,8 @@ func (e *Engine) ContextErr() error { return e.ctx.Error() }

type foomem []float64

func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) Pointer() unsafe.Pointer { return unsafe.Pointer(&m[0]) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }
func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }

func (e *Engine) checkThreeFloat(a, b, ret tensor.Tensor) (ad, bd, retVal *tensor.Dense, err error) {
if /*a.IsNativelyAccessible() &&*/ !a.IsManuallyManaged() {
Expand Down
15 changes: 4 additions & 11 deletions cgoflags.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,14 @@ package cu
//#cgo LDFLAGS:-lcuda
//
////default location:
//#cgo linux,windows LDFLAGS:-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib
//#cgo linux,windows CFLAGS: -I/usr/local/cuda/include/
//#cgo linux LDFLAGS:-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib
//#cgo linux CFLAGS: -I/usr/local/cuda/include
//
////default location if not properly symlinked:
//#cgo linux LDFLAGS:-L/usr/local/cuda-11.0/targets/x86_64-linux/lib
//#cgo linux LDFLAGS:-L/usr/local/cuda-10.2/lib64 -L/usr/local/cuda-10.2/lib
//#cgo linux LDFLAGS:-L/usr/local/cuda-10.1/lib64 -L/usr/local/cuda-10.1/lib
//#cgo linux LDFLAGS:-L/usr/local/cuda-6.0/lib64 -L/usr/local/cuda-6.0/lib
//#cgo linux LDFLAGS:-L/usr/local/cuda-5.5/lib64 -L/usr/local/cuda-5.5/lib
//#cgo linux LDFLAGS:-L/usr/local/cuda-5.0/lib64 -L/usr/local/cuda-5.0/lib
//#cgo linux CFLAGS: -I/usr/local/cuda-11.0/targets/x86_64-linux/include
//#cgo linux CFLAGS: -I/usr/local/cuda-10.2/include/
//#cgo linux CFLAGS: -I/usr/local/cuda-10.1/include/
//#cgo linux CFLAGS: -I/usr/local/cuda-6.0/include/
//#cgo linux CFLAGS: -I/usr/local/cuda-5.5/include/
//#cgo linux CFLAGS: -I/usr/local/cuda-5.0/include/
//
////Ubuntu 15.04:
//#cgo linux LDFLAGS:-L/usr/lib/x86_64-linux-gnu/
//#cgo linux CFLAGS: -I/usr/include
Expand Down
7 changes: 3 additions & 4 deletions cmd/cublas_integration_example/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"unsafe"

"gorgonia.org/cu"
"gorgonia.org/cu/blas"
cublas "gorgonia.org/cu/blas"
"gorgonia.org/tensor"
)

Expand Down Expand Up @@ -74,6 +74,5 @@ func (e *Engine) ContextErr() error { return e.ctx.Error() }

type foomem []float64

func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) Pointer() unsafe.Pointer { return unsafe.Pointer(&m[0]) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }
func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }
4 changes: 2 additions & 2 deletions cmd/gencublas/binding.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import (
"strings"
"text/template"

"github.com/cznic/cc"
"github.com/cznic/xc"
bg "github.com/gorgonia/bindgen"
"modernc.org/cc"
"modernc.org/xc"
)

var goTypes = map[bg.TypeKey]bg.Template{
Expand Down
2 changes: 1 addition & 1 deletion cmd/gencublas/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import (
"path"
"strings"

"github.com/cznic/cc"
bg "github.com/gorgonia/bindgen"
"modernc.org/cc"
)

var (
Expand Down
15 changes: 14 additions & 1 deletion cmd/gencudnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,17 @@ cudnnStatus_t cudnnGetCallback(
unsigned *mask,
void **udata,
cudnnCallback_t *fptr);
```
```


# TODOs

## Stubs ##

* /home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_ctcloss.go. TODO: true
* ~~/home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_spatialtransformer.go. TODO: true~~
* ~~/home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_seqdata.go. TODO: true~~
* ~~/home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_backend.go. TODO: true~~
* ~~/home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_rnndata.go. TODO: true~~
* ~~/home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_tensortransform.go. TODO: true~~
* /home/chewxy/workspace/gorgoniaws/src/gorgonia.org/cu/dnn/generated_algorithmdescriptor.go. TODO: true
1 change: 1 addition & 0 deletions cmd/gencudnn/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func csig2gosig(cs *bg.CSignature, retVal *GoSignature) (*GoSignature, error) {

ioParamList := ioParams[cs.Name]
for i, p := range params {

_, isRetVal := retValPos[i]
name := p.Name()
typeName := goNameOf(p.Type())
Expand Down
27 changes: 27 additions & 0 deletions cmd/gencudnn/declarations.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package main

import "strings"

var empty struct{}

var ignoredEnums = map[string]struct{}{
Expand Down Expand Up @@ -53,6 +55,17 @@ var ctypes2GoTypes = map[string]string{
"cudnnDropoutDescriptor_t": "Dropout",
"cudnnRNNDescriptor_t": "RNN",
"cudnnPersistentRNNPlan_t": "PersistentRNNPlan",

// cuda11
"cudnnFusedOpsVariantParamPack_t": "FusedOpVariantParams",
"cudnnFusedOpsConstParamPack_t": "FusedOpConsts",
"cudnnSeqDataDescriptor_t": "SeqData",
"cudnnTensorTransformDescriptor_t": "TensorTransform",
"cudnnAlgorithmDescriptor_t": "AlgorithmDescriptor",
"cudnnAlgorithmPerformance_t": "AlgorithmPerformance",
"cudnnBackendDescriptor_t": "Backend",
"cudnnRNNDataDescriptor_t": "RNNData",
"cudnnAttnDescriptor_t": "Attention",
}

var alphaBetaParams = []string{
Expand All @@ -69,6 +82,8 @@ var builtins = map[string]string{
"unsigned long long": "uint64",

"size_t": "uintptr",

"int64_t": "int64",
}

var go2cBuiltins = map[string]string{
Expand All @@ -80,6 +95,8 @@ var go2cBuiltins = map[string]string{
"uint64": "ulonglong",

"uintptr": "size_t",

"int64": "int64_t",
}

var nonPrimitives = map[string]string{
Expand All @@ -101,3 +118,13 @@ var fnParamTypes = map[string]map[string]string{
"cudnnFindConvolutionBackwardDataAlgorithm": {"returnedAlgoCount": "int"},
"cudnnFindConvolutionBackwardDataAlgorithmEx": {"returnedAlgoCount": "int"},
}

var deprecated = make(map[string]struct{})

func init() {
for n, doc := range docs {
if strings.Contains(doc, "has been deprecated in cuDNN 8.0.") {
deprecated[n] = struct{}{}
}
}
}
18 changes: 6 additions & 12 deletions cmd/gencudnn/generatethis.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ import (
"os"
"strings"

"github.com/cznic/cc"
"github.com/gorgonia/bindgen"
"github.com/kr/pretty"
"modernc.org/cc"
)

// generate this contains function to generate for THIS package (main)

// generateMappings is used to generate the mappings
func generateMappings(appendCurrent bool) {
func generateMappings(appendCurrent bool, fns ...func(buf io.WriteCloser, t *cc.TranslationUnit)) {
hdr := "package main\n"

initfn := `
Expand All @@ -38,17 +38,11 @@ func generateMappings(appendCurrent bool) {
fmt.Fprintln(buf, hdr)
bindgen.GenIgnored(buf, t, functions)
fmt.Fprintln(buf, initfn)
bindgen.GenNameMap(buf, t, "fnNameMap", processNameBasic, functions, true)
bindgen.GenNameMap(buf, t, "enumMappings", processNameBasic, enums, true)

generateCRUD(buf, t, "create")
generateCRUD(buf, t, "set")
generateCRUD(buf, t, "destroy")
generateCRUD(buf, t, "methods")
fmt.Fprintln(buf, "}\n")
}
generateAlphaBeta(buf, t)
fmt.Fprintln(buf, initfn)

for _, fn := range fns {
fn(buf, t)
}
fmt.Fprintln(buf, "}\n")
}

Expand Down
Loading

0 comments on commit a41082c

Please sign in to comment.