1 个不稳定版本
0.1.0 | 2019年10月26日 |
---|
#275 在 多媒体
13KB
142 行
safe_ocl
关于
此 crate 引入了安全的 OpenCL 的零成本包装类型。它引入了 2 种包装类型...
MapKernel
包装ocl::Kernel
MapProgram
包装ocl::Program
目前,这是一组相当有限的类型。它的一些局限性...
- 仅支持映射计算
- 仅支持二元算术运算符
- 仅适用于单线程
- 仅适用于单 GPU
- 仅在缓冲区读/写标志正确的前提下是安全的
这实际上只是一个用于添加泛型、新包装类型、可消除 OpenCL 使用中的未定义行为的子类型等新功能的框架。
示例
这是通常实现加法映射操作的方法。
let src = r#"
__kernel void add(__global float* buffer, float scalar) {
buffer[get_global_id(0)] += scalar;
}
"#;
// (1) Define which platform and device(s) to use. Create a context,
// queue, and program then define some dims (compare to step 1 above).
let platform = Platform::default();
let device = Device::first(platform).unwrap();
let context = Context::builder()
.platform(platform)
.devices(device.clone())
.build().unwrap();
let program = Program::builder()
.devices(device)
.src(src)
.build(&context).unwrap();
let queue = Queue::new(&context, device, None).unwrap();
let dims = 1 << 20;
// [NOTE]: At this point we could manually assemble a ProQue by calling:
// `ProQue::new(context, queue, program, Some(dims))`. One might want to
// do this when only one program and queue are all that's needed. Wrapping
// it up into a single struct makes passing it around simpler.
// (2) Create a `Buffer`:
let buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_WRITE)
.len(dims)
.fill_val(0f32)
.build().unwrap();
// (3) Create a kernel with arguments matching those in the source above:
let kernel = Kernel::builder()
.program(&program)
.name("add")
.queue(queue.clone())
.global_work_size(dims)
.arg(&buffer)
.arg(&10.0f32)
.build().unwrap();
// (4) Run the kernel (default parameters shown for demonstration purposes):
unsafe {
kernel.cmd()
.queue(&queue)
.global_work_offset(kernel.default_global_work_offset())
.global_work_size(dims)
.local_work_size(kernel.default_local_work_size())
.enq().unwrap();
}
// (5) Read results from the device into a vector (`::block` not shown):
let mut vec = vec![0.0f32; dims];
buffer.cmd()
.queue(&queue)
.offset(0)
.read(&mut vec)
.enq().unwrap();
assert_eq!(vec, vec![10.0f32; dims]);
这是使用上述类型的方法。
// (1) Define which platform and device(s) to use. Create a context,
// queue, and program then define some dims (compare to step 1 above).
let platform = Platform::default();
let device = Device::first(platform).unwrap();
let context = Context::builder()
.platform(platform)
.devices(device.clone())
.build().unwrap();
let program = MapProgram::from(device, Op::Add, &context).unwrap();
let queue = Queue::new(&context, device, None).unwrap();
let dims = 1 << 20;
// [NOTE]: At this point we could manually assemble a ProQue by calling:
// `ProQue::new(context, queue, program, Some(dims))`. One might want to
// do this when only one program and queue are all that's needed. Wrapping
// it up into a single struct makes passing it around simpler.
// (2) Create a `Buffer`:
let buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_WRITE) // TODO ensure buffer is read-write
.len(dims)
.fill_val(0f32)
.build().unwrap();
// (3) Create a kernel with arguments matching those in the source above:
let kernel = MapKernel::from(&program, queue.clone(), &buffer, &10.0f32).unwrap();
// (4) Run the kernel (default parameters shown for demonstration purposes):
kernel.cmd_enq(&queue);
// (5) Read results from the device into a vector (`::block` not shown):
let mut vec = vec![0.0f32; dims];
buffer.cmd()
.queue(&queue)
.offset(0)
.read(&mut vec)
.enq().unwrap();
assert_eq!(vec, vec![10.0f32; dims]);
依赖关系
~3MB
~55K SLoC