12个版本
0.1.11 | 2022年1月5日 |
---|---|
0.1.10 | 2022年1月4日 |
#867 in 机器学习
每月 22次下载
115KB
3K SLoC
tensorgraph-math
tensorgraph使用的数学原语。基于tensorgraph-sys构建,支持许多BLAS后端和设备。
使用openblas的基本示例
在Cargo.toml中启用功能
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas"] }
use tensorgraph_math::{tensor::Tensor, sys::View};
// 0 1
// A = 2 3
// 4 5
// B = 0 1
// 2 3
// column major (read each column first)
let a = [0., 2., 4., 1., 3., 5.];
let b = [0., 2., 1., 3.];
let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
// 2 3
// C = AB = 6 11
// 10 19
let c = a.matmul(b.view());
assert_eq!(c.into_inner().into_std(), [2., 6., 10., 3., 11., 19.]);
使用cublas全局变量和openblas一起使用的中间示例
在Cargo.toml中启用功能
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
use tensorgraph_math::{
blas::{DefaultBLASContext, cublas::CublasContext, BLAS},
sys::{
device::{DefaultDeviceAllocator, cuda::{Context, Cuda, Stream}, cpu::Cpu},
DefaultVec, View,
},
tensor::Tensor,
};
fn main() {
// init cuda context
let cuda_ctx = Context::quick_init().unwrap();
// create cuda stream and configure it as the global
let stream = Stream::new(&cuda_ctx).unwrap();
let _handle = stream.as_global();
// create cublas context, with the provided stream, and configure it as the global
let cublas_ctx = CublasContext::new();
let _handle = cublas_ctx.with_stream(Some(&stream)).as_global();
// cublas is the default BLAS implementation for CUDA when the feature is enabled
run::<Cuda>();
// openblas is the default BLAS implemenetation for CPU when the feature is enabled
run::<Cpu>();
}
/// Generic code that runs on the specified device
/// using that devices default allocator and BLAS provider
fn run<D: DefaultDeviceAllocator + DefaultBLASContext>()
where
f32: BLAS<D::Context>,
{
// 0 1
// A = 2 3
// 4 5
// B = 0 1
// 2 3
// column major (read each column first)
let a = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 4., 1., 3., 5.]);
let b = DefaultVec::<f32, D>::copy_from_host(&[0., 2., 1., 3.]);
let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
// 2 3
// C = AB = 6 11
// 10 19
let c = a.matmul(b.view());
let mut out = [0.; 6];
c.into_inner().copy_to_host(&mut out);
assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
}
通过传递blas上下文和分配器使用openblas和cublas的高级示例
在Cargo.toml中启用功能
tensorgraph-math = { version = "LATEST_VERSION", features = ["openblas", "cublas"] }
#![feature(allocator_api)]
use std::{alloc::Global, ops::Deref};
use tensorgraph_math::{
blas::{BLASContext, cublas::{CublasContext}, BLAS},
sys::{
device::{cuda::{Context, Cuda, Stream}, cpu::Cpu, Device, DeviceAllocator},
Vec, View,
},
tensor::Tensor,
};
fn main() {
// init cuda context
let cuda_ctx = Context::quick_init().unwrap();
// create cuda stream
let stream = Stream::new(&cuda_ctx).unwrap();
// create cublas context, with the provided stream
let cublas_ctx = CublasContext::new();
let cublas_ctx = cublas_ctx.with_stream(Some(&stream));
// run using the CUDA stream as the allocator, and cublas
// as the BLAS provider
run(cublas_ctx, stream.deref());
// run using the CPU default BLAS and Global allocator
run((), Global);
}
fn run<C: BLASContext, A: DeviceAllocator<Device = C::Device> + Copy>(ctx: C, alloc: A)
where
f32: BLAS<C>,
{
// 0 1
// A = 2 3
// 4 5
// B = 0 1
// 2 3
// column major (read each column first)
let a = Vec::copy_from_host_in(&[0., 2., 4., 1., 3., 5.], alloc);
let b = Vec::copy_from_host_in(&[0., 2., 1., 3.0_f32], alloc);
let a = Tensor::from_shape([3, 2], a); // 3 rows x 2 cols
let b = Tensor::from_shape([2, 2], b); // 2 rows x 2 cols
// 2 3
// C = AB = 6 11
// 10 19
let c = a.matmul_into(b.view(), ctx, alloc);
let mut out = [0.; 6];
c.into_inner().copy_to_host(&mut out);
assert_eq!(out, [2., 6., 10., 3., 11., 19.]);
}
依赖项
~0.1–10MB
~146K SLoC