Skip to content

Commit

Permalink
ptx: parse all CUDA SDK
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Apr 11, 2024
1 parent a7d29ca commit af00bde
Show file tree
Hide file tree
Showing 10 changed files with 335 additions and 93 deletions.
3 changes: 3 additions & 0 deletions ptx/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ itertools = "0"
object = "0"
bytes = "1"

clap = { version = "4", features = [ "derive" ] }

[dev-dependencies]
once_cell = "1"
snailquote = "0"
diff = { path = "../diff" }
regex = "1"
3 changes: 3 additions & 0 deletions ptx/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ The provided libraries may in the future be used for
```bash
docker run -v "$PWD/kernels/:/out" ptxsamples
```

i = 194
[ptx/src/parser.rs:1909:13] &kernel.path() = "/Users/roman/dev/box/ptx/kernels/cuda_12_3_r123compiler33567101_0_sm50_newdelete.1.sm_50.ptx"
1 change: 1 addition & 0 deletions ptx/bison/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ color-eyre = "0"
duct = "0"
bindgen = "0"
cc = { version = "1", features = [] }
utils = { path = "../../utils" }
6 changes: 4 additions & 2 deletions ptx/bison/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ fn build_ptx_parser() -> eyre::Result<()> {
// generated_ptx_lexer,
// generated_ptx_parser,
// ];
let sources = [generated_files.clone(), vec![
let sources = vec![
source_dir.join("util.cc"),
source_dir.join("gpgpu.cc"),
source_dir.join("gpgpu_sim.cc"),
Expand All @@ -189,7 +189,9 @@ fn build_ptx_parser() -> eyre::Result<()> {
source_dir.join("operand_info.cc"),
source_dir.join("symbol.cc"),
source_dir.join("lib.cc"),
]].concat();
];
// let sources = utils::fs::multi_glob([source_dir.join("*.cc").to_string_lossy().to_string()]).collect::<Result<_, _>>()?;
let sources = [generated_files.clone(), sources].concat();
// let sources = vec![
// source_dir.join("memory_space.cc"),
// ];
Expand Down
10 changes: 10 additions & 0 deletions ptx/bison/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use color_eyre::eyre;
use clap::Parser;
use std::path::PathBuf;
use std::ffi::CString;
use std::time::Instant;

#[derive(Parser, Debug, Clone)]
pub struct ParsePTXOptions {
Expand All @@ -25,8 +26,17 @@ fn main() -> eyre::Result<()> {

match options.command {
Command::ParsePTX(ParsePTXOptions {ptx_path}) => {
let code_size_bytes = std::fs::metadata(&ptx_path)?.len();
let path = CString::new(ptx_path.to_string_lossy().as_bytes())?;
let start = Instant::now();
unsafe { ptxbison::bindings::load_ptx_from_filename(path.as_c_str().as_ptr()) };
let dur = start.elapsed();
let dur_millis = dur.as_millis();
let dur_secs = dur.as_secs_f64();
let code_size_mib = code_size_bytes as f64 / (1024.0*1024.0);
let mib_per_sec = code_size_mib / dur_secs;
println!("parsing {} took {} ms ({:3.3} MiB/s)", ptx_path.display(), dur_millis, mib_per_sec);

}
}

Expand Down
6 changes: 3 additions & 3 deletions ptx/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,6 @@ pub enum ParseError<'a> {
// pub value: f64,
// }

#[derive(Debug, FromPest)]
#[pest_ast(rule(Rule::EOI))]
struct EOI;
// #[derive(Debug, FromPest)]
// #[pest_ast(rule(Rule::EOI))]
// struct EOI;
13 changes: 3 additions & 10 deletions ptx/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
#![allow(dead_code)]

#[macro_use]
extern crate pest_derive;
#[macro_use]
extern crate pest_ast;
#[macro_use]
extern crate pest;
pub mod ast;
pub mod parser;

mod ast;
mod parser;

use crate::parser::Rule;
use parser::Rule;
use ast::{ASTNode, FunctionDeclHeader, ParseError};
use color_eyre::eyre;
use pest::iterators::Pair;
Expand Down
46 changes: 46 additions & 0 deletions ptx/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use color_eyre::eyre;
use std::path::PathBuf;
use std::time::Instant;

use ptx::parser::{Rule, Parser as PTXParser};
use clap::Parser as ClapParser;
use pest::Parser as PestParser;

#[derive(ClapParser, Debug, Clone)]
pub struct ParsePTXOptions {
pub ptx_path: PathBuf,
}

#[derive(ClapParser, Debug, Clone)]
pub enum Command {
ParsePTX(ParsePTXOptions),
}

#[derive(ClapParser, Debug, Clone)]
pub struct Options {
#[clap(subcommand)]
pub command: Command,
}


fn main() -> eyre::Result<()> {
color_eyre::install()?;
let options = Options::parse();

match options.command {
Command::ParsePTX(ParsePTXOptions {ptx_path}) => {
let ptx_code = std::fs::read_to_string(&ptx_path)?;
let code_size_bytes = ptx_code.bytes().len();
let start = Instant::now();
let parsed = PTXParser::parse(Rule::program, &ptx_code)?;
let dur = start.elapsed();
let dur_millis = dur.as_millis();
let dur_secs = dur.as_secs_f64();
let code_size_mib = code_size_bytes as f64 / (1024.0*1024.0);
let mib_per_sec = code_size_mib / dur_secs;
println!("parsing {} took {} ms ({:3.3} MiB/s)", ptx_path.display(), dur_millis, mib_per_sec);
}
}

Ok(())
}
171 changes: 151 additions & 20 deletions ptx/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[derive(Parser)]
#[derive(pest_derive::Parser)]
#[grammar = "./ptx.pest"]
pub struct Parser;

Expand Down Expand Up @@ -1086,10 +1086,7 @@ mod tests {
Ok(())
}

#[test]
fn opcode_precendence() -> eyre::Result<()> {
crate::tests::init_test();
let opcodes = [
const ALL_OPCODES: [&str; 151] = [
"abs",
"addp",
"addc",
Expand Down Expand Up @@ -1243,7 +1240,11 @@ mod tests {
"xor",
];

for opcode in opcodes {

#[test]
fn opcode_precendence() -> eyre::Result<()> {
crate::tests::init_test();
for opcode in ALL_OPCODES {
dbg!(&opcode);
assert_parses_to_typed(
Rule::opcode,
Expand Down Expand Up @@ -1349,6 +1350,38 @@ param1
Ok(())
}


#[allow(non_snake_case)]
#[test]
fn parse_variable_decl_global_align_8_u64_underscore_ztv9containeriie6_initializer(
) -> eyre::Result<()> {
crate::tests::init_test();
let want = r#"
(variable_decl
(variable_spec (space_spec (addressable_spec: ".global")))
(variable_spec (align_spec (integer (decimal: "8"))))
(variable_spec (type_spec (scalar_type: ".u64")))
(identifier_spec
(identifier: "_ZTV9ContainerIiE")
(integer (decimal: "6")))
(variable_decl_initializer
(operand (literal_operand (integer (decimal: "0"))))
(operand (literal_operand (integer (decimal: "0"))))
(operand (identifier: "_ZN9ContainerIiED1Ev"))
(operand (identifier: "_ZN9ContainerIiED0Ev"))
(operand (literal_operand (integer (decimal: "0"))))
(operand (literal_operand (integer (decimal: "0"))))
)
)
"#;
assert_parses_to(
Rule::variable_decl,
r#".global .align 8 .u64 _ZTV9ContainerIiE[6] = {0, 0, _ZN9ContainerIiED1Ev, _ZN9ContainerIiED0Ev, 0, 0};"#,
want,
)?;
Ok(())
}

#[test]
fn parse_prototype_decl_prototype_0_callprototype() -> eyre::Result<()> {
crate::tests::init_test();
Expand Down Expand Up @@ -1498,6 +1531,47 @@ ld.param.b32 %r115, [retval0+0];
Ok(())
}


#[test]
fn parse_prototype_decl_prototype_15_callprototype() -> eyre::Result<()> {
crate::tests::init_test();
let want = r#"
(prototype_decl
(identifier: "prototype_15")
(identifier: "_")
(prototype_param
(scalar_type: ".b64")
(identifier_spec (identifier: "_"))
)
(prototype_param
(align_spec (integer (decimal: "4")))
(scalar_type: ".b8")
(identifier_spec
(identifier: "_")
(integer (decimal: "16")))
)
)
"#;
let code = r#"prototype_15 : .callprototype
()_ (.param .b64 _, .param .align 4 .b8 _[16]);
"#;
assert_parses_to(
Rule::prototype_param,
".param .align 4 .b8 _[16]",
r#"(prototype_param
(align_spec (integer (decimal: "4")))
(scalar_type: ".b8")
(identifier_spec
(identifier: "_")
(integer (decimal: "16")))
)
"#,
)?;
assert_parses_to(Rule::prototype_decl, code, want)?;
Ok(())
}


#[test]
fn parse_extern_func_param_b32_func_retval0_vprintf() -> eyre::Result<()> {
crate::tests::init_test();
Expand Down Expand Up @@ -1582,30 +1656,37 @@ ld.param.b32 %r115, [retval0+0];
Ok(())
}

#[test]
fn parse_vshr_u32_u32_u32_clamp_add() -> eyre::Result<()> {
crate::tests::init_test();
let want = r#"
(instruction_statement
(instruction
(opcode_spec
(opcode: "ld")
(option (addressable_spec: ".global"))
(option (type_spec (scalar_type: ".b32")))
(opcode: "vshr")
(option (type_spec (scalar_type: ".u32")))
(option (type_spec (scalar_type: ".u32")))
(option (type_spec (scalar_type: ".u32")))
(option: ".clamp")
(option: ".add")
)
(operand (identifier: "r2"))
(operand (memory_operand
(identifier: "array")
(address_expression (identifier: "r1"))
))
(operand (identifier: "%r952"))
(operand (identifier: "%r1865"))
(operand (identifier: "%r1079"))
(operand (identifier: "%r1865"))
)
)
"#;
assert_parses_to(
Rule::opcode_spec,
"vshr.u32.u32.u32.clamp.add",
r#"(memory_operand
(identifier: "array")
(address_expression (identifier: "r1"))
r#"(opcode_spec
(opcode: "vshr")
(option (type_spec (scalar_type: ".u32")))
(option (type_spec (scalar_type: ".u32")))
(option (type_spec (scalar_type: ".u32")))
(option: ".clamp")
(option: ".add")
)"#,
)?;
assert_parses_to(
Expand All @@ -1631,6 +1712,7 @@ ld.param.b32 %r115, [retval0+0];
Ok(())
}


#[test]
fn parse_loc_1_120_13() -> eyre::Result<()> {
crate::tests::init_test();
Expand Down Expand Up @@ -1815,21 +1897,70 @@ ld.param.b32 %r115, [retval0+0];
}

#[test]
fn parse_all_kernels() -> eyre::Result<()> {
fn extract_opcodes() -> eyre::Result<()> {
use std::fs::{read_dir, read_to_string, DirEntry};
use std::path::PathBuf;
use std::collections::HashSet;

crate::tests::init_test();
// pest::set_call_limit(std::num::NonZeroUsize::new(10000));
let kernels_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("kernels");
dbg!(&kernels_dir);
let mut kernels = read_dir(&kernels_dir)?
.into_iter()
.collect::<Result<Vec<DirEntry>, _>>()?;
kernels.sort_by_key(|k| k.path());

let all_opcodes = ALL_OPCODES.join("|");
let opcode_regex = regex::Regex::new(&format!(r"({})(\.[\w.:]*)", all_opcodes)).unwrap();

// atom.add.release.gpu.u32 %r57,[%rd10],%r58;
let mut all_options = HashSet::new();
for kernel in kernels {
dbg!(&kernel.path());
let ptx_code = read_to_string(kernel.path())?;
let parsed = PTXParser::parse(Rule::program, &ptx_code)?;
let captures = opcode_regex.captures_iter(&ptx_code);
for m in captures {
let options = m[2].split(".").filter(|o| !o.is_empty()).map(ToString::to_string);
all_options.extend(options);
}
}

let mut all_options: Vec<_> = all_options.into_iter().collect();
all_options.sort();
dbg!(&all_options);
Ok(())
}

#[test]
fn all_kernels() -> eyre::Result<()> {
use std::fs::{read_dir, read_to_string, DirEntry};
use std::path::PathBuf;
use std::time::Instant;
crate::tests::init_test();
// pest::set_call_limit(std::num::NonZeroUsize::new(10000));
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let kernels_dir = manifest_dir.join("kernels");
dbg!(&kernels_dir);
let mut kernels = read_dir(&kernels_dir)?
.into_iter()
.collect::<Result<Vec<DirEntry>, _>>()?;
kernels.sort_by_key(|k| k.path());

let skip = std::env::var("SKIP").ok().map(|s| s.parse::<usize>()).transpose()?.unwrap_or(0);

let kernels_iter = kernels.iter().enumerate().skip(skip);

for (i, kernel) in kernels_iter {
let ptx_code = read_to_string(kernel.path())?;
let code_size_bytes = ptx_code.bytes().len();
let start = Instant::now();
let _parsed = PTXParser::parse(Rule::program, &ptx_code)?;
let dur = start.elapsed();
let dur_millis = dur.as_millis();
let dur_secs = dur.as_secs_f64();
let code_size_mib = code_size_bytes as f64 / (1024.0*1024.0);
let mib_per_sec = code_size_mib / dur_secs;
println!("[{:>4}] parsing {} took {} ms ({:3.3} MiB/s)", i, &kernel.path().display(), dur_millis, mib_per_sec);
}
Ok(())
}
Expand Down
Loading

0 comments on commit af00bde

Please sign in to comment.