Control flow analysis

Here is the output of reprospect.tools.sass.controlflow.Graph.to_mermaid() from this SASS code.

        flowchart TD
	block_0["LDC R1, c[0x0][0x37c]<br/>S2R R0, SR_TID.Y<br/>LDCU UR4, c[0x0][0x364]<br/>S2R R3, SR_CTAID.X<br/>LDCU.64 UR12, c[0x0][0x3b0]<br/>IMAD R0, R3, UR4, R0<br/>IADD3 R0, PT, PT, R0, UR12, RZ<br/>ISETP.GE.U32.AND P0, PT, R0, UR13, PT<br/>@P0 EXIT"]:::myblock
	block_144["LDCU UR5, c[0x0][0x370]<br/>LDCU.64 UR8, c[0x0][0x120]<br/>LDCU.64 UR10, c[0x0][0x358]<br/>UIMAD UR4, UR4, UR5, URZ<br/>UIADD3.64 UR6, UPT, UPT, UR8, 0x10010, URZ<br/>UIADD3 UR5, UPT, UPT, -UR4, UR13, URZ"]:::myblock
	block_240["LDC.64 R2, c[0x0][0x388]<br/>BSSY.RECONVERGENT B0, 0x1a0<br/>IMAD.WIDE.U32 R4, R0, 0x8, R2<br/>QSPC.E.G P0, RZ, [R4]<br/>ISETP.GE.U64.AND P1, PT, R4, UR6, PT<br/>ISETP.GE.U64.AND P1, PT, R4, UR8, !P1<br/>PLOP3.LUT P0, PT, P0, P1, PT, 0x20, 0x2<br/>@!P0 BRA 0x190"]:::myblock
	block_368["LDC.64 R2, c[0x0][0x398]<br/>REDG.E.ADD.64.STRONG.GPU desc[UR10][R4.64], R2"]:::myblock
	block_400["BSYNC.RECONVERGENT B0"]:::myblock
	block_416["BSSY.RECONVERGENT B0, 0x2e0<br/>@P0 BRA 0x2d0"]:::myblock
	block_448["LDC.64 R2, c[0x0][0x398]<br/>ATOM.E.ADD.64.STRONG.GPU P0, RZ, desc[UR10][R4.64], R2<br/>@P0 BRA 0x2d0"]:::myblock
	block_496["QSPC.E.S P0, RZ, [R4]<br/>@!P0 BRA 0x290"]:::myblock
	block_528["LDC.64 R8, c[0x0][0x398]<br/>MOV R2, R4<br/>MOV R3, R2"]:::myblock
	block_576["LDS.64 R4, [R3]<br/>IADD.64 R6, R4, R8<br/>ATOMS.CAST.SPIN.64 P0, [R3], R4, R6<br/>@!P0 BRA 0x240"]:::myblock
	block_640["BRA 0x2d0"]:::myblock
	block_656["LD.E.64 R2, desc[UR10][R4.64]<br/>LDCU.64 UR12, c[0x0][0x398]<br/>IADD.64 R2, R2, UR12<br/>ST.E.64 desc[UR10][R4.64], R2"]:::myblock
	block_720["BSYNC.RECONVERGENT B0"]:::myblock
	block_736["LDCU UR12, c[0x0][0x3b4]<br/>ISETP.GE.U32.AND P0, PT, R0.reuse, UR5, PT<br/>IADD3 R0, PT, PT, R0, UR4, RZ<br/>SEL R0, R0, UR12, !P0<br/>ISETP.GE.U32.AND P0, PT, R0, UR12, PT<br/>@!P0 BRA 0xf0"]:::myblock
	block_832["EXIT"]:::myblock
	block_848["BRA 0x350"]:::myblock
	block_0 --> block_144
	block_144 --> block_240
	block_240 --> block_400
	block_240 --> block_368
	block_368 --> block_400
	block_400 --> block_416
	block_416 --> block_720
	block_416 --> block_448
	block_448 --> block_720
	block_448 --> block_496
	block_496 --> block_656
	block_496 --> block_528
	block_528 --> block_576
	block_576 --> block_576
	block_576 --> block_640
	block_640 --> block_720
	block_656 --> block_720
	block_720 --> block_736
	block_736 --> block_240
	block_736 --> block_832
	block_848 --> block_848
	classDef myblock text-align:left
    
class tests.tools.sass.test_controlflow.TestControlFlowView on GitHub

Bases: object

Tests for reprospect.tools.sass.controlflow.ControlFlow.

class TestAtomicAddInt64View on GitHub

Bases: object

SASS_ATOM_SM120: Final[Path] = PosixPath('/__w/reprospect/reprospect/tests/tools/sass/assets/atomic_add.int64.sm_120.sass')
test() NoneView on GitHub

Parse SASS_ATOM_SM120.

class TestIfsView on GitHub

Bases: object

CU_IFS: Final[Path] = PosixPath('/__w/reprospect/reprospect/tests/tools/sass/assets/ifs.cu')
INSTRUCTIONS: Final[list[Instruction]] = [Instruction(offset=0, instruction='LDC R1, c[0x0][0x37c]', hex='0x0000df00ff017b82', control=ControlCode(stall_count=1, yield_flag=True, read=7, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=16, instruction='S2R R0, SR_TID.X', hex='0x0000000000007919', control=ControlCode(stall_count=7, yield_flag=True, read=7, write=1, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=32, instruction='S2UR UR4, SR_CTAID.X', hex='0x00000000000479c3', control=ControlCode(stall_count=1, yield_flag=True, read=7, write=1, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=48, instruction='LDCU UR6, c[0x0][0x388]', hex='0x00007100ff0677ac', control=ControlCode(stall_count=7, yield_flag=True, read=7, write=2, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=64, instruction='LDC R5, c[0x0][0x360]', hex='0x0000d800ff057b82', control=ControlCode(stall_count=2, yield_flag=True, read=7, write=1, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=80, instruction='IMAD R5, R5, UR4, R0', hex='0x0000000405057c24', control=ControlCode(stall_count=1, yield_flag=True, read=7, write=7, wait=[False, True, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=96, instruction='LDCU.64 UR4, c[0x0][0x358]', hex='0x00006b00ff0477ac', control=ControlCode(stall_count=4, yield_flag=True, read=7, write=1, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=112, instruction='ISETP.GE.U32.AND P0, PT, R5, UR6, PT', hex='0x0000000605007c0c', control=ControlCode(stall_count=13, yield_flag=False, read=7, write=7, wait=[False, False, True, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=128, instruction='@P0 BRA 0x130', hex='0x0000000000280947', control=ControlCode(stall_count=5, yield_flag=True, read=7, write=7, wait=[True, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=144, instruction='LDC.64 R2, c[0x0][0x380]', hex='0x0000e000ff027b82', control=ControlCode(stall_count=2, yield_flag=True, read=7, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=160, instruction='IMAD.WIDE.U32 R2, R5, 0x4, R2', hex='0x0000000405027825', control=ControlCode(stall_count=5, yield_flag=False, read=7, write=7, wait=[True, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=176, instruction='LDG.E R0, desc[UR4][R2.64]', hex='0x0000000402007981', control=ControlCode(stall_count=2, yield_flag=True, read=7, write=2, wait=[False, True, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=192, instruction='ISETP.GE.AND P0, PT, R0, 0x1, PT', hex='0x000000010000780c', control=ControlCode(stall_count=13, yield_flag=False, read=7, write=7, wait=[False, False, True, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=208, instruction='@P0 IMAD R7, R5, -0x2a, R0', hex='0xffffffd605070824', control=ControlCode(stall_count=5, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=224, instruction='@P0 STG.E desc[UR4][R2.64], R7', hex='0x0000000702000986', control=ControlCode(stall_count=1, yield_flag=True, read=0, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=240, instruction='@P0 EXIT', hex='0x000000000000094d', control=ControlCode(stall_count=5, yield_flag=True, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=256, instruction='IMAD R5, R5, 0x29a, R0', hex='0x0000029a05057824', control=ControlCode(stall_count=5, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=272, instruction='STG.E desc[UR4][R2.64], R5', hex='0x0000000502007986', control=ControlCode(stall_count=1, yield_flag=True, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=288, instruction='EXIT', hex='0x000000000000794d', control=ControlCode(stall_count=5, yield_flag=True, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=304, instruction='ISETP.NE.U32.AND P0, PT, RZ, UR6, PT', hex='0x00000006ff007c0c', control=ControlCode(stall_count=13, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=320, instruction='@!P0 EXIT', hex='0x000000000000894d', control=ControlCode(stall_count=5, yield_flag=True, read=7, write=7, wait=[False, True, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=336, instruction='LDC.64 R2, c[0x0][0x380]', hex='0x0000e000ff027b82', control=ControlCode(stall_count=2, yield_flag=True, read=7, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=352, instruction='LDG.E R0, desc[UR4][R2.64]', hex='0x0000000402007981', control=ControlCode(stall_count=2, yield_flag=True, read=7, write=2, wait=[True, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=368, instruction='IADD3 R5, PT, PT, R0, 0x1, RZ', hex='0x0000000100057810', control=ControlCode(stall_count=5, yield_flag=False, read=7, write=7, wait=[False, False, True, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=384, instruction='STG.E desc[UR4][R2.64], R5', hex='0x0000000502007986', control=ControlCode(stall_count=1, yield_flag=True, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=400, instruction='EXIT', hex='0x000000000000794d', control=ControlCode(stall_count=5, yield_flag=True, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=416, instruction='BRA 0x1a0', hex='0xfffffffc00fc7947', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=432, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=448, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=464, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=480, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=496, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=512, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=528, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=544, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=560, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=576, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=592, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=608, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=624, instruction='NOP', hex='0x0000000000007918', control=ControlCode(stall_count=0, yield_flag=False, read=7, write=7, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False}))]
SASS_IFS_SM120: Final[Path] = PosixPath('/__w/reprospect/reprospect/tests/tools/sass/assets/ifs.sm_120.sass')
test(workdir: Path, parameters: Parameters, cmake_file_api: FileAPI) NoneView on GitHub

Compile CU_IFS and build the CFG.

test_analyse() NoneView on GitHub
test_create_blocks() NoneView on GitHub
test_find_entry_points() NoneView on GitHub
class TestVirtualFunctionsView on GitHub

Bases: object

Virtual functions on device generate branching instructions whose targets are resolved at runtime:

CALL.REL.NOINC R2 0x0

nvdisasm does not consider them as creating a new basic block.

VIRTUAL_CU: Final[Path] = PosixPath('/__w/reprospect/reprospect/tests/tools/sass/assets/virtual.cu')
pytestmark = [Mark(name='parametrize', args=('parameters', (Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.VOLTA: 'VOLTA'>, compute_capability=ComputeCapability(major=7, minor=0))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.TURING: 'TURING'>, compute_capability=ComputeCapability(major=7, minor=5))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.AMPERE: 'AMPERE'>, compute_capability=ComputeCapability(major=8, minor=0))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.AMPERE: 'AMPERE'>, compute_capability=ComputeCapability(major=8, minor=6))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.ADA: 'ADA'>, compute_capability=ComputeCapability(major=8, minor=9))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.HOPPER: 'HOPPER'>, compute_capability=ComputeCapability(major=9, minor=0))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.BLACKWELL: 'BLACKWELL'>, compute_capability=ComputeCapability(major=10, minor=0))), Parameters(arch=NVIDIAArch(family=<NVIDIAFamily.BLACKWELL: 'BLACKWELL'>, compute_capability=ComputeCapability(major=12, minor=0))))), kwargs={'ids': <class 'str'>})]
test(request, workdir: Path, parameters: Parameters, cmake_file_api: FileAPI) NoneView on GitHub
class tests.tools.sass.test_controlflow.TestGraphView on GitHub

Bases: object

Tests for reprospect.tools.sass.controlflow.Graph.

CONTROLCODE: Final[ControlCode] = ControlCode(stall_count=0, yield_flag=False, read=0, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})
INSTRUCTIONS: Final[tuple[Instruction, ...]] = (Instruction(offset=0, instruction='DADD R4, R4, c[0x0][0x180]', hex='0x0', control=ControlCode(stall_count=0, yield_flag=False, read=0, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=0, instruction='FADD R4, R4, c[0x0][0x180]', hex='0x0', control=ControlCode(stall_count=0, yield_flag=False, read=0, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=0, instruction='DMUL R6, R6, c[0x0][0x188]', hex='0x1', control=ControlCode(stall_count=0, yield_flag=False, read=0, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})), Instruction(offset=0, instruction='NOP', hex='0x2', control=ControlCode(stall_count=0, yield_flag=False, read=0, write=0, wait=[False, False, False, False, False, False], reuse={'A': False, 'B': False, 'C': False, 'D': False})))
cfg() GraphView on GitHub
test_add_block(cfg: Graph) NoneView on GitHub

Add a block.

test_add_blocks(cfg: Graph) NoneView on GitHub

Add blocks.

test_add_blocks_with_edges(cfg: Graph) NoneView on GitHub

Add blocks with edges.

test_to_mermaid(cfg: Graph) NoneView on GitHub

Convert to mermaid.