openvm_cuda_common/
error.rs

1use std::{
2    ffi::CStr,
3    os::raw::{c_char, c_int},
4};
5
6use thiserror::Error;
7
8#[link(name = "cudart")]
9extern "C" {
10    fn cudaGetErrorString(error: c_int) -> *const c_char;
11    fn cudaGetErrorName(error: c_int) -> *const c_char;
12}
13
14/// Safely convert a C string pointer returned by CUDA into a Rust `String`.
15fn cstr_to_string(ptr: *const c_char) -> String {
16    if ptr.is_null() {
17        return "Unknown CUDA error (null pointer)".to_string();
18    }
19    unsafe { CStr::from_ptr(ptr).to_string_lossy().into_owned() }
20}
21
22/// Returns the symbolic error name (e.g. "cudaErrorMemoryAllocation")
23pub fn get_cuda_error_name(error_code: i32) -> String {
24    let name_ptr = unsafe { cudaGetErrorName(error_code) };
25    cstr_to_string(name_ptr)
26}
27
28/// Returns a descriptive error string (e.g. "out of memory")
29pub fn get_cuda_error_string(error_code: i32) -> String {
30    let str_ptr = unsafe { cudaGetErrorString(error_code) };
31    cstr_to_string(str_ptr)
32}
33
34/// A CUDA error with code, name, and message
35#[derive(Error, Debug)]
36#[error("{message} ({name})")]
37pub struct CudaError {
38    pub code: i32,
39    pub name: String,
40    pub message: String,
41}
42
43impl CudaError {
44    /// Construct from a raw CUDA error code (non-zero).
45    pub fn new(code: i32) -> Self {
46        CudaError {
47            code,
48            name: get_cuda_error_name(code),
49            message: get_cuda_error_string(code),
50        }
51    }
52
53    /// Returns `Ok(())` if `code == 0` (cudaSuccess), or `Err(CudaError)` if non-zero.
54    pub fn from_result(code: i32) -> Result<(), Self> {
55        if code == 0 {
56            Ok(())
57        } else {
58            Err(Self::new(code))
59        }
60    }
61
62    /// Returns `true` if the error is cudaErrorMemoryAllocation
63    #[inline]
64    pub fn is_out_of_memory(&self) -> bool {
65        self.code == 2
66    }
67}
68
69#[inline]
70pub fn check(code: i32) -> Result<(), CudaError> {
71    CudaError::from_result(code)
72}
73
74#[derive(Error, Debug)]
75pub enum MemoryError {
76    #[error(transparent)]
77    Cuda(#[from] CudaError),
78
79    #[error("Attempted to free null pointer")]
80    NullPointer,
81
82    #[error("Attempted to free untracked pointer")]
83    UntrackedPointer,
84
85    #[error("Failed to acquire memory manager lock")]
86    LockError,
87
88    #[error("Invalid memory size: {size}")]
89    InvalidMemorySize { size: usize },
90
91    #[error(
92        "Out of memory in pool (size requested: {requested} bytes, available: {available} bytes)"
93    )]
94    OutOfMemory { requested: usize, available: usize },
95
96    #[error("Invalid pointer: pointer not found in allocation table")]
97    InvalidPointer,
98}
99
100#[derive(Error, Debug)]
101pub enum MemCopyError {
102    #[error(transparent)]
103    Cuda(#[from] CudaError),
104    #[error("Size mismatch in {operation}: host len={host_len}, device len={device_len}")]
105    SizeMismatch {
106        operation: &'static str,
107        host_len: usize,
108        device_len: usize,
109    },
110}
111
112#[derive(Error, Debug)]
113pub enum KernelError {
114    #[error(transparent)]
115    Cuda(#[from] CudaError),
116
117    #[error("Unsupported type size {size}")]
118    UnsupportedTypeSize { size: usize },
119}