openvm_cuda_common/
error.rs

1use std::{
2    ffi::CStr,
3    os::raw::{c_char, c_int},
4};
5
6use thiserror::Error;
7
8#[link(name = "cudart")]
9extern "C" {
10    fn cudaGetErrorString(error: c_int) -> *const c_char;
11    fn cudaGetErrorName(error: c_int) -> *const c_char;
12}
13
14/// Safely convert a C string pointer returned by CUDA into a Rust `String`.
15fn cstr_to_string(ptr: *const c_char) -> String {
16    if ptr.is_null() {
17        return "Unknown CUDA error (null pointer)".to_string();
18    }
19    unsafe { CStr::from_ptr(ptr).to_string_lossy().into_owned() }
20}
21
22/// Returns the symbolic error name (e.g. "cudaErrorMemoryAllocation")
23pub fn get_cuda_error_name(error_code: i32) -> String {
24    let name_ptr = unsafe { cudaGetErrorName(error_code) };
25    cstr_to_string(name_ptr)
26}
27
28/// Returns a descriptive error string (e.g. "out of memory")
29pub fn get_cuda_error_string(error_code: i32) -> String {
30    let str_ptr = unsafe { cudaGetErrorString(error_code) };
31    cstr_to_string(str_ptr)
32}
33
34/// A CUDA error with code, name, and message
35#[derive(Error, Debug)]
36#[error("{message} ({name})")]
37pub struct CudaError {
38    pub code: i32,
39    pub name: String,
40    pub message: String,
41}
42
43impl CudaError {
44    /// Construct from a raw CUDA error code (non-zero).
45    pub fn new(code: i32) -> Self {
46        CudaError {
47            code,
48            name: get_cuda_error_name(code),
49            message: get_cuda_error_string(code),
50        }
51    }
52
53    /// Returns `Ok(())` if `code == 0` (cudaSuccess), or `Err(CudaError)` if non-zero.
54    pub fn from_result(code: i32) -> Result<(), Self> {
55        crate::stream::mark_cuda_thread_used();
56        if code == 0 {
57            Ok(())
58        } else {
59            Err(Self::new(code))
60        }
61    }
62
63    /// Returns `true` if the error is cudaErrorMemoryAllocation
64    #[inline]
65    pub fn is_out_of_memory(&self) -> bool {
66        self.code == 2
67    }
68}
69
70#[inline]
71pub fn check(code: i32) -> Result<(), CudaError> {
72    CudaError::from_result(code)
73}
74
75#[derive(Error, Debug)]
76pub enum MemoryError {
77    #[error(transparent)]
78    Cuda(#[from] CudaError),
79
80    #[error("Attempted to free null pointer")]
81    NullPointer,
82
83    #[error("Attempted to free untracked pointer")]
84    UntrackedPointer,
85
86    #[error("Failed to acquire memory manager lock")]
87    LockError,
88
89    #[error("Invalid memory size: {size}")]
90    InvalidMemorySize { size: usize },
91
92    #[error(
93        "Out of memory in pool (size requested: {requested} bytes, available: {available} bytes)"
94    )]
95    OutOfMemory { requested: usize, available: usize },
96
97    #[error("Invalid pointer: pointer not found in allocation table")]
98    InvalidPointer,
99
100    #[error("Failed to reserve virtual address space (bytes: {size}, page size: {page_size})")]
101    ReserveFailed { size: usize, page_size: usize },
102}
103
104#[derive(Error, Debug)]
105pub enum MemCopyError {
106    #[error(transparent)]
107    Cuda(#[from] CudaError),
108    #[error("Size mismatch in {operation}: host len={host_len}, device len={device_len}")]
109    SizeMismatch {
110        operation: &'static str,
111        host_len: usize,
112        device_len: usize,
113    },
114}
115
116#[derive(Error, Debug)]
117pub enum KernelError {
118    #[error(transparent)]
119    Cuda(#[from] CudaError),
120
121    #[error("Unsupported type size {size}")]
122    UnsupportedTypeSize { size: usize },
123}