diff --git a/sparsity-testing-scripts/create_mem_bin.py b/sparsity-testing-scripts/create_mem_bin.py new file mode 100644 index 000000000..cbb1f99bd --- /dev/null +++ b/sparsity-testing-scripts/create_mem_bin.py @@ -0,0 +1,19 @@ +# create_mem_bin.py +import struct + +def create_mem_bin(filename, total_size=0x10000000, pattern=0xDEADBEEF): + with open(filename, 'wb') as f: + # Write first 16 bytes (4 repetitions of 0xDEADBEEF) + for _ in range(4): + f.write(struct.pack(' 0: + write_size = min(chunk_size, remaining) + f.write(zero_chunk[:write_size]) + remaining -= write_size + +if __name__ == '__main__': + create_mem_bin('mem.bin') diff --git a/sparsity-testing-scripts/create_mem_regions.py b/sparsity-testing-scripts/create_mem_regions.py new file mode 100755 index 000000000..b18fe1e36 --- /dev/null +++ b/sparsity-testing-scripts/create_mem_regions.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +import sys +import struct +import os + +def parse_regions(regions_file): + """ + Parses the regions.txt file and returns a list of tuples containing start addresses and sizes. + """ + regions = [] + with open(regions_file, 'r') as f: + for line in f: + # Skip empty lines and comments + if not line.strip() or line.startswith('#'): + continue + parts = line.strip().split() + if len(parts) != 2: + print(f"Warning: Invalid line format: {line.strip()}") + continue + start_str, size_str = parts + try: + start = int(start_str, 16) + size = int(size_str, 10) + regions.append( (start, size) ) + except ValueError: + print(f"Warning: Invalid hexadecimal number in line: {line.strip()}") + continue + return regions + +def read_elf_header(f): + """ + Reads the ELF header from the file and returns a dictionary with relevant fields. + """ + f.seek(0) + # ELF header for 64-bit little endian + elf_header_struct = struct.Struct('<16sHHIQQQIHHHHHH') + elf_header_data = f.read(elf_header_struct.size) + unpacked = elf_header_struct.unpack(elf_header_data) + + elf_header = { + 'e_ident': unpacked[0], + 'e_type': unpacked[1], + 'e_machine': unpacked[2], + 'e_version': unpacked[3], + 'e_entry': unpacked[4], + 'e_phoff': unpacked[5], + 'e_shoff': unpacked[6], + 'e_flags': unpacked[7], + 'e_ehsize': unpacked[8], + 'e_phentsize': unpacked[9], + 'e_phnum': unpacked[10], + 'e_shentsize': unpacked[11], + 'e_shnum': unpacked[12], + 'e_shstrndx': unpacked[13], + } + return elf_header + +def read_program_headers(f, elf_header): + """ + Reads all program headers and returns a list of dictionaries. + """ + program_headers = [] + f.seek(elf_header['e_phoff']) + ph_struct = struct.Struct('= end_va: + continue + # Calculate overlap + overlap_start = max(start_va, seg_start) + overlap_end = min(end_va, seg_end) + overlap_size = overlap_end - overlap_start + # Calculate file offset + offset = ph['p_offset'] + (overlap_start - ph['p_vaddr']) + # Read the data + f.seek(offset) + chunk = f.read(overlap_size) + if len(chunk) < overlap_size: + print(f"Warning: Could not read enough data for VA 0x{overlap_start:X}") + chunk += b'\x00' * (overlap_size - len(chunk)) + # Calculate where to place the data in the region + region_offset = overlap_start - start_va + # Ensure data array is big enough + while len(data) < region_offset: + data += b'\x00' + # Insert data_chunk at the correct offset + if len(data) < region_offset + overlap_size: + data += b'\x00' * (region_offset + overlap_size - len(data)) + data[region_offset:region_offset + overlap_size] = chunk + # After processing all segments, ensure data is exactly 'size' bytes + if len(data) < size: + data += b'\x00' * (size - len(data)) + elif len(data) > size: + data = data[:size] + return data + +def create_binary_file(data, output_bin): + """ + Writes the binary data to 'output_bin'. + """ + with open(output_bin, 'wb') as f: + f.write(data) + +def create_assembly_file(symbol_name, section_name, data_bin, output_asm): + """ + Creates an assembly file that defines a section containing the binary data. + """ + with open(output_asm, 'w') as f: + f.write(f"/* {output_asm} - Auto-generated Assembly File */\n\n") + f.write(f" .section {section_name}, \"aw\", @progbits\n") + f.write(f" .global {symbol_name}\n") + f.write(f"{symbol_name}:\n") + f.write(f" .incbin \"{data_bin}\"\n\n") + +def assemble_section(asm_file, obj_file): + """ + Assembles the assembly file into an object file using the RISC-V assembler. + """ + import subprocess + cmd = ['riscv64-unknown-elf-as', '-o', obj_file, asm_file] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + print(f"Error: Assembly failed for {asm_file}: {e}") + sys.exit(1) + +def main(): + if len(sys.argv) != 3: + print("Usage: python3 extract_regions.py ") + sys.exit(1) + + mem_elf = sys.argv[1] + regions_file = sys.argv[2] + + # Check if mem.elf exists + if not os.path.isfile(mem_elf): + print(f"Error: File '{mem_elf}' does not exist.") + sys.exit(1) + + # Check if regions.txt exists + if not os.path.isfile(regions_file): + print(f"Error: File '{regions_file}' does not exist.") + sys.exit(1) + + regions = parse_regions(regions_file) + if not regions: + print("Error: No valid regions found in regions.txt.") + sys.exit(1) + + # Open mem.elf + with open(mem_elf, 'rb') as f: + elf_header = read_elf_header(f) + # Verify ELF Magic Number + if elf_header['e_ident'][:4] != b'\x7fELF': + print("Error: Not a valid ELF file.") + sys.exit(1) + # Verify 64-bit ELF + if elf_header['e_ident'][4] != 2: + print("Error: Only 64-bit ELF files are supported.") + sys.exit(1) + # Parse program headers + program_headers = read_program_headers(f, elf_header) + + for idx, (start, size) in enumerate(regions): + print(f"Processing region {idx}: Start=0x{start:X}, Size=0x{size:X}") + data = extract_data(f, program_headers, start, size) + + # Create binary file + data_bin = f"data_mem{idx}.bin" + create_binary_file(data, data_bin) + print(f" Created binary file: {data_bin}") + + # Create assembly file + section_name = f".data_mem{idx}" + symbol_name = f"data_mem{idx}" # Changed symbol name to avoid leading '.' + asm_file = f"data_mem{idx}.S" + create_assembly_file(symbol_name, section_name, data_bin, asm_file) + print(f" Created assembly file: {asm_file}") + + # Assemble into .o file + obj_file = f"data_mem{idx}.o" + assemble_section(asm_file, obj_file) + print(f" Assembled object file: {obj_file}") + + print("All memory regions processed successfully.") + +if __name__ == "__main__": + main() diff --git a/sparsity-testing-scripts/linker_script_gen.py b/sparsity-testing-scripts/linker_script_gen.py new file mode 100644 index 000000000..2e499e19b --- /dev/null +++ b/sparsity-testing-scripts/linker_script_gen.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +# generate_linker_script.py + +import sys + +def generate_linker_script(regions, output_filename): + with open(output_filename, 'w') as f: + f.write("/* sparse_mem.ld - Auto-generated Linker Script */\n\n") + + # Define a single MEMORY region + f.write("MEMORY\n") + f.write("{\n") + f.write(" MEM (rwx) : ORIGIN = 0x80000000, LENGTH = 0x80000000\n") + f.write("}\n\n") + + # Define SECTIONS with specified origin addresses + f.write("SECTIONS\n") + f.write("{\n") + for idx, (start, size, mem_region) in enumerate(regions): + section_name = f".data_mem{idx}" + f.write(f" {section_name} 0x{start:X} :\n") + f.write(" {\n") + f.write(f" KEEP(*({section_name}))\n") + f.write(" } > ") + f.write(f"{mem_region}\n\n") + + # Standard sections + f.write(" .text :\n") + f.write(" {\n") + f.write(" *(.text)\n") + f.write(" } > MEM\n\n") + + f.write(" .bss :\n") + f.write(" {\n") + f.write(" *(.bss)\n") + f.write(" *(COMMON)\n") + f.write(" } > MEM\n\n") + + f.write(" /* Additional sections can be defined here */\n") + f.write("}\n") + +def parse_regions_with_memory(regions_file): + """ + Parses regions.txt and assigns each region to the single MEMORY region MEM. + Assumes regions.txt has lines with: start_address size + Sizes are specified in decimal. + """ + regions = [] + with open(regions_file, 'r') as rf: + for line in rf: + parts = line.strip().split() + if len(parts) != 2: + continue + start_str, size_str = parts + try: + start = int(start_str, 16) # Start address in hexadecimal + size = int(size_str, 10) # Size in decimal + mem_region = "MEM" # Single memory region + regions.append( (start, size, mem_region) ) + except ValueError: + print(f"Warning: Invalid number format in line: {line.strip()}") + continue + return regions + +def main(): + if len(sys.argv) != 3: + print("Usage: python3 generate_linker_script.py ") + sys.exit(1) + + regions_file = sys.argv[1] + output_ld_file = sys.argv[2] + + regions = parse_regions_with_memory(regions_file) + if not regions: + print("Error: No valid regions found in regions.txt.") + sys.exit(1) + + generate_linker_script(regions, output_ld_file) + print(f"Linker script '{output_ld_file}' generated successfully.") + +if __name__ == "__main__": + main() diff --git a/sparsity-testing-scripts/memory_region_scanner.cpp b/sparsity-testing-scripts/memory_region_scanner.cpp new file mode 100644 index 000000000..64b3c8739 --- /dev/null +++ b/sparsity-testing-scripts/memory_region_scanner.cpp @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct MemoryRegion { + Elf64_Addr start_addr; + Elf64_Xword size; +}; + +void usage(const char *progname) { + std::cerr << "Usage: " << progname << " " << std::endl; +} + +int scan_memory_regions(const char *input_file, size_t chunk_size, const char *output_file) { + if (elf_version(EV_CURRENT) == EV_NONE) { + std::cerr << "ELF library initialization failed: " << elf_errmsg(-1) << std::endl; + return 1; + } + + int fd_in = open(input_file, O_RDONLY); + if (fd_in < 0) { + perror("Failed to open input file"); + return 1; + } + + Elf *e_in = elf_begin(fd_in, ELF_C_READ, nullptr); + if (!e_in) { + std::cerr << "elf_begin() failed: " << elf_errmsg(-1) << std::endl; + close(fd_in); + return 1; + } + + GElf_Ehdr ehdr; + if (gelf_getehdr(e_in, &ehdr) != &ehdr) { + std::cerr << "Failed to get ELF header: " << elf_errmsg(-1) << std::endl; + elf_end(e_in); + close(fd_in); + return 1; + } + + // SOOHYUK: Debug: Print ELF header information + std::cout << "ELF Class: " << ((gelf_getclass(e_in) == ELFCLASS32) ? "32-bit" : "64-bit") << std::endl; + std::cout << "ELF Data Encoding: " << ((ehdr.e_ident[EI_DATA] == ELFDATA2LSB) ? "Little Endian" : "Big Endian") << std::endl; + + // Retrieve the number of program headers + size_t n_phdrs; + if (elf_getphdrnum(e_in, &n_phdrs) != 0) { + std::cerr << "Failed to get program header count: " << elf_errmsg(-1) << std::endl; + elf_end(e_in); + close(fd_in); + return 1; + } + + std::vector useful_regions; + + // Iterate over each program header + for (size_t i = 0; i < n_phdrs; ++i) { + GElf_Phdr phdr; + if (gelf_getphdr(e_in, i, &phdr) != &phdr) { + std::cerr << "Failed to get program header: " << elf_errmsg(-1) << std::endl; + elf_end(e_in); + close(fd_in); + return 1; + } + + // SOOHYUK: Debug: Print program header information + std::cout << "\nProcessing Segment " << i << ":" << std::endl; + std::cout << " Type: " << phdr.p_type << std::endl; + std::cout << " Offset: 0x" << std::hex << phdr.p_offset << std::dec << std::endl; + std::cout << " Virtual Address: 0x" << std::hex << phdr.p_vaddr << std::dec << std::endl; + std::cout << " Physical Address: 0x" << std::hex << phdr.p_paddr << std::dec << std::endl; + std::cout << " File Size: " << phdr.p_filesz << " bytes" << std::endl; + std::cout << " Memory Size: " << phdr.p_memsz << " bytes" << std::endl; + std::cout << " Flags: " << phdr.p_flags << std::endl; + + if (phdr.p_type != PT_LOAD) { + std::cout << " Skipping non-loadable segment." << std::endl; + continue; + } + + // SOOHYUK: Debug: Print virtual address and size (to accomodate memory concatenations) + std::cout << " Segment Virtual Address: 0x" << std::hex << phdr.p_vaddr << std::dec << std::endl; + std::cout << " Segment Size (filesz): " << phdr.p_filesz << " bytes" << std::endl; + + // Seek to the segment's file offset + if (lseek(fd_in, phdr.p_offset, SEEK_SET) < 0) { + perror("Failed to seek in input file"); + elf_end(e_in); + close(fd_in); + return 1; + } + + // Read the segment's data + std::vector segment_data(phdr.p_filesz); + ssize_t bytes_read = read(fd_in, segment_data.data(), phdr.p_filesz); + if (bytes_read != static_cast(phdr.p_filesz)) { + perror("Failed to read segment data"); + std::cerr << "Expected: " << phdr.p_filesz << ", Read: " << bytes_read << std::endl; + elf_end(e_in); + close(fd_in); + return 1; + } + + // Calculate the number of chunks in this segment + size_t num_chunks = (phdr.p_filesz + chunk_size - 1) / chunk_size; + + for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { + size_t chunk_offset = chunk_idx * chunk_size; + size_t chunk_end = std::min(chunk_offset + chunk_size, static_cast(phdr.p_filesz)); + size_t actual_chunk_size = chunk_end - chunk_offset; + + // Check if the chunk is all zeros + bool all_zero = std::all_of(segment_data.begin() + chunk_offset, segment_data.begin() + chunk_end, + [](unsigned char c) { return c == 0; }); + + // Compute the absolute start address + Elf64_Addr absolute_start_addr = phdr.p_vaddr + chunk_offset; + + // Debug: Print chunk information + std::cout << " Chunk " << chunk_idx << ": " + << "Offset " << std::hex << chunk_offset + << ", Size " << std::dec << actual_chunk_size + << ", Absolute Start Address: 0x" << std::hex << absolute_start_addr + << ", All Zero: " << (all_zero ? "Yes" : "No") << std::dec << std::endl; + + if (!all_zero) { + // Record the useful memory region + MemoryRegion region; + region.start_addr = absolute_start_addr; + region.size = actual_chunk_size; + useful_regions.push_back(region); + } + } + } + + // Clean up ELF resources + elf_end(e_in); + close(fd_in); + + // Sort the useful regions by starting address for clarity + std::sort(useful_regions.begin(), useful_regions.end(), + [](const MemoryRegion &a, const MemoryRegion &b) -> bool { + return a.start_addr < b.start_addr; + }); + + // Output the useful memory regions to the output file + std::ofstream ofs(output_file); + if (!ofs) { + std::cerr << "Failed to open output file: " << output_file << std::endl; + return 1; + } + + for (const auto ®ion : useful_regions) { + ofs << "0x" << std::hex << region.start_addr << " " << std::dec << region.size << std::endl; + } + + ofs.close(); + + std::cout << "\nScan complete. Useful memory regions written to: " << output_file << std::endl; + return 0; +} + +int main(int argc, char **argv) { + if (argc != 4) { + usage(argv[0]); + return 1; + } + const char *input_elf = argv[1]; + size_t chunk_size = std::stoul(argv[2]); + const char *output_file = argv[3]; + + return scan_memory_regions(input_elf, chunk_size, output_file); +} diff --git a/sparsity-testing-scripts/sparse-mem-link.sh b/sparsity-testing-scripts/sparse-mem-link.sh new file mode 100755 index 000000000..9124152dc --- /dev/null +++ b/sparsity-testing-scripts/sparse-mem-link.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# set -e + +# usage() { +# echo "Usage: $0 [OPTIONS]" +# echo "" +# echo "Options" +# echo " --help -h : Display this message" +# echo " -n : Number of harts" +# echo " -b : Binary to run in spike" +# echo " -p : PC to take checkpoint at [default 0x80000000]" +# echo " -i : Instructions after PC to take checkpoint at [default 0]" +# echo " -m : ISA to pass to spike for checkpoint generation [default rv64gc]" +# echo " -o : Output directory to store the checkpoint in. [default ...loadarch]" +# exit "$1" +# } + +# NHARTS=1 +BINARY="hello.riscv.0x80000000.1000.loadarch/mem.elf" +# PC="0x80000000" +# INSNS=0 +# ISA="rv64gc" +# OUTPATH="" +# while [ "$1" != "" ]; +# do +# case $1 in +# -h | --help ) +# usage 3 ;; +# -n ) +# shift +# NHARTS=$1 ;; +# -b ) +# shift +# BINARY=$1 ;; +# -p ) +# shift +# PC=$1 ;; +# -i ) +# shift +# INSNS=$1 ;; +# -m ) +# shift +# ISA=$1 ;; +# -o ) +# shift +# OUTPATH=$1 ;; +# * ) +# error "Invalid option $1" +# usage 1 ;; +# esac +# shift +# done + +# BASEMEM="$((0x80000000)):$((0x10000000))" +# # SooHyuk: change thisBASEMEM="$((0x80000000)):$((0x20000000))" + +# SPIKEFLAGS="-p$NHARTS --pmpregions=0 --isa=$ISA -m$BASEMEM" +# BASENAME=$(basename -- $BINARY) + +# if [ -z "$OUTPATH" ] ; then +# OUTPATH=$BASENAME.$PC.$INSNS.loadarch +# fi + +# echo "Generating loadarch directory $OUTPATH" +# rm -rf $OUTPATH +# mkdir -p $OUTPATH + +# LOADARCH_FILE=$OUTPATH/loadarch +# RAWMEM_ELF=$OUTPATH/raw.elf +# LOADMEM_ELF=$OUTPATH/mem.elf +# CMDS_FILE=$OUTPATH/cmds_tmp.txt +# SPIKECMD_FILE=$OUTPATH/spikecmd.sh + +# echo "Generating state capture spike interactive commands in $CMDS_FILE" +# echo "until pc 0 $PC" >> $CMDS_FILE +# echo "rs $INSNS" >> $CMDS_FILE +# echo "dump" >> $CMDS_FILE +# for (( h=0; h<$NHARTS; h++ )) +# do +# echo "pc $h" >> $CMDS_FILE +# echo "priv $h" >> $CMDS_FILE +# echo "reg $h fcsr" >> $CMDS_FILE + +# echo "reg $h vstart" >> $CMDS_FILE +# echo "reg $h vxsat" >> $CMDS_FILE +# echo "reg $h vxrm" >> $CMDS_FILE +# echo "reg $h vcsr" >> $CMDS_FILE +# echo "reg $h vtype" >> $CMDS_FILE + +# echo "reg $h stvec" >> $CMDS_FILE +# echo "reg $h sscratch" >> $CMDS_FILE +# echo "reg $h sepc" >> $CMDS_FILE +# echo "reg $h scause" >> $CMDS_FILE +# echo "reg $h stval" >> $CMDS_FILE +# echo "reg $h satp" >> $CMDS_FILE + +# echo "reg $h mstatus" >> $CMDS_FILE +# echo "reg $h medeleg" >> $CMDS_FILE +# echo "reg $h mideleg" >> $CMDS_FILE +# echo "reg $h mie" >> $CMDS_FILE +# echo "reg $h mtvec" >> $CMDS_FILE +# echo "reg $h mscratch" >> $CMDS_FILE +# echo "reg $h mepc" >> $CMDS_FILE +# echo "reg $h mcause" >> $CMDS_FILE +# echo "reg $h mtval" >> $CMDS_FILE +# echo "reg $h mip" >> $CMDS_FILE + +# echo "reg $h mcycle" >> $CMDS_FILE +# echo "reg $h minstret" >> $CMDS_FILE + +# echo "mtime" >> $CMDS_FILE +# echo "mtimecmp $h" >> $CMDS_FILE + +# for (( fr=0; fr<32; fr++ )) +# do +# echo "freg $h $fr" >> $CMDS_FILE +# done +# for (( xr=0; xr<32; xr++ )) +# do +# echo "reg $h $xr" >> $CMDS_FILE +# done +# echo "vreg $h" >> $CMDS_FILE +# done +# echo "quit" >> $CMDS_FILE + +# echo "spike -d --debug-cmd=$CMDS_FILE $SPIKEFLAGS $BINARY" > $SPIKECMD_FILE + +# echo "Capturing state at checkpoint to spikeout" +# spike -d --debug-cmd=$CMDS_FILE $SPIKEFLAGS $BINARY 2> $LOADARCH_FILE + + +echo "Finding tohost/fromhost in elf file" +TOHOST=$(riscv64-unknown-elf-nm $BINARY | grep tohost | head -c 16) +FROMHOST=$(riscv64-unknown-elf-nm $BINARY | grep fromhost | head -c 16) + +# echo "Compiling memory to elf" +# riscv64-unknown-elf-objcopy -I binary -O elf64-littleriscv mem.0x80000000.bin $RAWMEM_ELF +# rm -rf mem.0x80000000.bin + +echo "Link mem elf files with tohost/fromhost" +# riscv64-unknown-elf-ld -T linker_temp.ld --defsym tohost=0x$TOHOST --defsym fromhost=0x$FROMHOST -o mem_combined.elf mem.0x80000000.o mem.0xA0000000.o +# riscv64-unknown-elf-ld -T linker_temp.ld --defsym tohost=0x$TOHOST --defsym fromhost=0x$FROMHOST -o final_program_hosts_linked.elf final_program.elf +riscv64-unknown-elf-ld -T sparse_mem.ld --defsym tohost=0x$TOHOST --defsym fromhost=0x$FROMHOST -o final_program.elf data_mem0.o data_mem1.o data_mem2.o data_mem3.o data_mem4.o data_mem5.o data_mem6.o data_mem7.o data_mem8.o data_mem9.o data_mem10.o +# riscv64-unknown-elf-ld -Tdata=0x80000000 -nmagic --defsym tohost=0x$TOHOST --defsym fromhost=0x$FROMHOST -o $LOADMEM_ELF $RAWMEM_ELF +# rm -rf $RAWMEM_ELF +