#opencl #ocl #gpu #gpgpu

safe_ocl

安全的 OpenCL 零成本包装类型

1 个不稳定版本

0.1.0 2019年10月26日

#275多媒体

MIT 许可证

13KB
142

safe_ocl

Gitter

关于

此 crate 引入了安全的 OpenCL 的零成本包装类型。它引入了 2 种包装类型...

  • MapKernel 包装 ocl::Kernel
  • MapProgram 包装 ocl::Program

目前,这是一组相当有限的类型。它的一些局限性...

  • 仅支持映射计算
  • 仅支持二元算术运算符
  • 仅适用于单线程
  • 仅适用于单 GPU
  • 仅在缓冲区读/写标志正确的前提下是安全的

这实际上只是一个用于添加泛型、新包装类型、可消除 OpenCL 使用中的未定义行为的子类型等新功能的框架。

示例

这是通常实现加法映射操作的方法。

let src = r#"
    __kernel void add(__global float* buffer, float scalar) {
        buffer[get_global_id(0)] += scalar;
    }
"#;

// (1) Define which platform and device(s) to use. Create a context,
// queue, and program then define some dims (compare to step 1 above).
let platform = Platform::default();
let device = Device::first(platform).unwrap();
let context = Context::builder()
    .platform(platform)
    .devices(device.clone())
    .build().unwrap();
let program = Program::builder()
    .devices(device)
    .src(src)
    .build(&context).unwrap();
let queue = Queue::new(&context, device, None).unwrap();
let dims = 1 << 20;
// [NOTE]: At this point we could manually assemble a ProQue by calling:
// `ProQue::new(context, queue, program, Some(dims))`. One might want to
// do this when only one program and queue are all that's needed. Wrapping
// it up into a single struct makes passing it around simpler.

// (2) Create a `Buffer`:
let buffer = Buffer::<f32>::builder()
    .queue(queue.clone())
    .flags(flags::MEM_READ_WRITE)
    .len(dims)
    .fill_val(0f32)
    .build().unwrap();

// (3) Create a kernel with arguments matching those in the source above:
let kernel = Kernel::builder()
    .program(&program)
    .name("add")
    .queue(queue.clone())
    .global_work_size(dims)
    .arg(&buffer)
    .arg(&10.0f32)
    .build().unwrap();

// (4) Run the kernel (default parameters shown for demonstration purposes):
unsafe {
    kernel.cmd()
        .queue(&queue)
        .global_work_offset(kernel.default_global_work_offset())
        .global_work_size(dims)
        .local_work_size(kernel.default_local_work_size())
        .enq().unwrap();
}

// (5) Read results from the device into a vector (`::block` not shown):
let mut vec = vec![0.0f32; dims];
buffer.cmd()
    .queue(&queue)
    .offset(0)
    .read(&mut vec)
    .enq().unwrap();

assert_eq!(vec, vec![10.0f32; dims]);

这是使用上述类型的方法。

// (1) Define which platform and device(s) to use. Create a context,
// queue, and program then define some dims (compare to step 1 above).
let platform = Platform::default();
let device = Device::first(platform).unwrap();
let context = Context::builder()
    .platform(platform)
    .devices(device.clone())
    .build().unwrap();
let program = MapProgram::from(device, Op::Add, &context).unwrap();
let queue = Queue::new(&context, device, None).unwrap();
let dims = 1 << 20;
// [NOTE]: At this point we could manually assemble a ProQue by calling:
// `ProQue::new(context, queue, program, Some(dims))`. One might want to
// do this when only one program and queue are all that's needed. Wrapping
// it up into a single struct makes passing it around simpler.

// (2) Create a `Buffer`:
let buffer = Buffer::<f32>::builder()
    .queue(queue.clone())
    .flags(flags::MEM_READ_WRITE) // TODO ensure buffer is read-write
    .len(dims)
    .fill_val(0f32)
    .build().unwrap();

// (3) Create a kernel with arguments matching those in the source above:
let kernel = MapKernel::from(&program, queue.clone(), &buffer, &10.0f32).unwrap();

// (4) Run the kernel (default parameters shown for demonstration purposes):
kernel.cmd_enq(&queue);

// (5) Read results from the device into a vector (`::block` not shown):
let mut vec = vec![0.0f32; dims];
buffer.cmd()
    .queue(&queue)
    .offset(0)
    .read(&mut vec)
    .enq().unwrap();

assert_eq!(vec, vec![10.0f32; dims]);

依赖关系

~3MB
~55K SLoC