Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ruff linter #201

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.4
hooks:
- id: ruff
args: [--fix]
ksimpson-work marked this conversation as resolved.
Show resolved Hide resolved
- id: ruff-format

default_language_version:
python: python3
44 changes: 27 additions & 17 deletions continuous_integration/scripts/render-template.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import argparse
import json
from jinja2 import Environment, FileSystemLoader
import os
import re


# TODO: make this work for arbitrary context. ie. implement replace_using_context()
def replace_placeholder(source_str, variable_name, variable_value):
# Escaping any regex special characters in variable_name
Expand All @@ -14,39 +14,49 @@ def replace_placeholder(source_str, variable_name, variable_value):
# Using regular expression to replace ${variable_name} with actual variable_value
# \s* means any amount of whitespace (including none)
# pattern = rf'\$\{{\s*\{{\s*{variable_name_escaped}\s*\}}\s*\}}'
pattern = rf'<<\s*{variable_name_escaped}\s*>>'
pattern = rf"<<\s*{variable_name_escaped}\s*>>"
return re.sub(pattern, variable_value.strip(), source_str)


# Setup command-line argument parsing
parser = argparse.ArgumentParser(description='Render a Jinja2 template using a JSON context.')
parser.add_argument('template_file', type=str, help='Path to the Jinja2 template file (with .j2 extension).')
parser.add_argument('json_file', type=str, help='Path to the JSON file to use as the rendering context.')
parser.add_argument('output_file', type=str, help='Path to the output file.')
parser = argparse.ArgumentParser(
description="Render a Jinja2 template using a JSON context."
)
parser.add_argument(
"template_file",
type=str,
help="Path to the Jinja2 template file (with .j2 extension).",
)
parser.add_argument(
"json_file", type=str, help="Path to the JSON file to use as the rendering context."
)
parser.add_argument("output_file", type=str, help="Path to the output file.")

args = parser.parse_args()

# Load JSON file as the rendering context
with open(args.json_file, 'r') as file:
with open(args.json_file, "r") as file:
context = json.load(file)

# Setup Jinja2 environment and load the template
env = Environment(
loader=FileSystemLoader(searchpath='./'),
variable_start_string='<<',
variable_end_string='>>',
block_start_string='<%',
block_end_string='%>',
comment_start_string='<#',
comment_end_string='#>')
env.filters['replace_placeholder'] = replace_placeholder
loader=FileSystemLoader(searchpath="./"),
variable_start_string="<<",
variable_end_string=">>",
block_start_string="<%",
block_end_string="%>",
comment_start_string="<#",
comment_end_string="#>",
)
env.filters["replace_placeholder"] = replace_placeholder

template = env.get_template(args.template_file)

# Render the template with the context
rendered_content = template.render(context)
# print(rendered_content)

with open(args.output_file, 'w') as file:
with open(args.output_file, "w") as file:
file.write(rendered_content)

print(f'Template rendered successfully. Output saved to {args.output_file}')
print(f"Template rendered successfully. Output saved to {args.output_file}")
6 changes: 3 additions & 3 deletions cuda_bindings/benchmarks/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
kernel_string = '''\
kernel_string = """\
#define ITEM_PARAM(x, T) T x
#define REP1(x, T) , ITEM_PARAM(x, T)
#define REP1(x, T) , ITEM_PARAM(x, T)
#define REP2(x, T) REP1(x##0, T) REP1(x##1, T)
#define REP4(x, T) REP2(x##0, T) REP2(x##1, T)
#define REP8(x, T) REP4(x##0, T) REP4(x##1, T)
Expand Down Expand Up @@ -160,4 +160,4 @@
// Do not touch param to prevent compiler from copying
// the whole structure from const bank to lmem.
}
'''
"""
50 changes: 32 additions & 18 deletions cuda_bindings/benchmarks/perf_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,30 @@
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
import numpy as np
import pytest

from cuda import cuda, cudart, nvrtc
import numpy as np


def ASSERT_DRV(err):
if isinstance(err, cuda.CUresult):
if err != cuda.CUresult.CUDA_SUCCESS:
raise RuntimeError('Cuda Error: {}'.format(err))
raise RuntimeError(f"Cuda Error: {err}")
elif isinstance(err, cudart.cudaError_t):
if err != cudart.cudaError_t.cudaSuccess:
raise RuntimeError('Cudart Error: {}'.format(err))
raise RuntimeError(f"Cudart Error: {err}")
elif isinstance(err, nvrtc.nvrtcResult):
if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
raise RuntimeError('Nvrtc Error: {}'.format(err))
raise RuntimeError(f"Nvrtc Error: {err}")
else:
raise RuntimeError('Unknown error type: {}'.format(err))
raise RuntimeError(f"Unknown error type: {err}")


@pytest.fixture
def init_cuda():
# Initialize
err, = cuda.cuInit(0)
(err,) = cuda.cuInit(0)
ASSERT_DRV(err)
err, device = cuda.cuDeviceGet(0)
ASSERT_DRV(err)
Expand All @@ -38,31 +41,42 @@ def init_cuda():

yield device, ctx, stream

err, = cuda.cuStreamDestroy(stream)
(err,) = cuda.cuStreamDestroy(stream)
ASSERT_DRV(err)
err, = cuda.cuCtxDestroy(ctx)
(err,) = cuda.cuCtxDestroy(ctx)
ASSERT_DRV(err)


@pytest.fixture
def load_module():
module = None

def _load_module(kernel_string, device):
nonlocal module
# Get module
err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)
err, major = cuda.cuDeviceGetAttribute(
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device
)
ASSERT_DRV(err)
err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)
err, minor = cuda.cuDeviceGetAttribute(
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device
)
ASSERT_DRV(err)

err, prog = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b'kernelString.cu', 0, [], [])
err, prog = nvrtc.nvrtcCreateProgram(
str.encode(kernel_string), b"kernelString.cu", 0, [], []
)
ASSERT_DRV(err)
opts = [b'--fmad=false', bytes('--gpu-architecture=sm_' + str(major) + str(minor), 'ascii')]
err, = nvrtc.nvrtcCompileProgram(prog, 2, opts)
opts = [
b"--fmad=false",
bytes("--gpu-architecture=sm_" + str(major) + str(minor), "ascii"),
]
(err,) = nvrtc.nvrtcCompileProgram(prog, 2, opts)

err_log, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
ASSERT_DRV(err_log)
log = b' ' * logSize
err_log, = nvrtc.nvrtcGetProgramLog(prog, log)
log = b" " * logSize
(err_log,) = nvrtc.nvrtcGetProgramLog(prog, log)
ASSERT_DRV(err_log)
result = log.decode()
if len(result) > 1:
Expand All @@ -71,8 +85,8 @@ def _load_module(kernel_string, device):
ASSERT_DRV(err)
err, cubinSize = nvrtc.nvrtcGetCUBINSize(prog)
ASSERT_DRV(err)
cubin = b' ' * cubinSize
err, = nvrtc.nvrtcGetCUBIN(prog, cubin)
cubin = b" " * cubinSize
(err,) = nvrtc.nvrtcGetCUBIN(prog, cubin)
ASSERT_DRV(err)
cubin = np.char.array(cubin)
err, module = cuda.cuModuleLoadData(cubin)
Expand All @@ -82,5 +96,5 @@ def _load_module(kernel_string, device):

yield _load_module

err, = cuda.cuModuleUnload(module)
(err,) = cuda.cuModuleUnload(module)
ASSERT_DRV(err)
35 changes: 24 additions & 11 deletions cuda_bindings/benchmarks/test_cupy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,47 @@
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
import pytest
import ctypes

import pytest

# Always skip since cupy is not CTK 12.x yet
skip_tests = True
if not skip_tests:
try:
import cupy

skip_tests = False
except ImportError:
skip_tests = True

from .kernels import kernel_string


def launch(kernel, args=()):
kernel((1,), (1,), args)


# Measure launch latency with no parmaeters
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_empty_kernel(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('empty_kernel')
kernel = module.get_function("empty_kernel")

stream = cupy.cuda.stream.Stream(non_blocking=True)

with stream:
benchmark(launch, kernel)
stream.synchronize()


# Measure launch latency with a single parameter
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel')
kernel = module.get_function("small_kernel")
cupy.cuda.set_allocator()
arg = cupy.cuda.alloc(ctypes.sizeof(ctypes.c_float))

Expand All @@ -50,12 +55,13 @@ def test_launch_latency_small_kernel(benchmark):
benchmark(launch, kernel, (arg,))
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_args(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_args')
kernel = module.get_function("small_kernel_512_args")
cupy.cuda.set_allocator()

args = []
Expand All @@ -69,12 +75,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_bools(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_bools')
kernel = module.get_function("small_kernel_512_bools")
cupy.cuda.set_allocator()

args = [True] * 512
Expand All @@ -86,12 +93,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_doubles(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_doubles')
kernel = module.get_function("small_kernel_512_doubles")
cupy.cuda.set_allocator()

args = [1.2345] * 512
Expand All @@ -103,12 +111,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_ints(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_ints')
kernel = module.get_function("small_kernel_512_ints")
cupy.cuda.set_allocator()

args = [123] * 512
Expand All @@ -120,12 +129,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_bytes(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_chars')
kernel = module.get_function("small_kernel_512_chars")
cupy.cuda.set_allocator()

args = [127] * 512
Expand All @@ -137,12 +147,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_512_longlongs(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_512_longlongs')
kernel = module.get_function("small_kernel_512_longlongs")
cupy.cuda.set_allocator()

args = [9223372036854775806] * 512
Expand All @@ -154,12 +165,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_256_args(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_256_args')
kernel = module.get_function("small_kernel_256_args")
cupy.cuda.set_allocator()

args = []
Expand All @@ -173,12 +185,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
benchmark(launch, kernel, args)
stream.synchronize()


# Measure launch latency with many parameters using builtin parameter packing
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
@pytest.mark.benchmark(group="cupy")
def test_launch_latency_small_kernel_16_args(benchmark):
module = cupy.RawModule(code=kernel_string)
kernel = module.get_function('small_kernel_16_args')
kernel = module.get_function("small_kernel_16_args")
cupy.cuda.set_allocator()

args = []
Expand Down
Loading