Tensorium
Loading...
Searching...
No Matches
tensorium::GemmKernelBigger< T > Member List

This is the complete list of members for tensorium::GemmKernelBigger< T >, including all inherited members.

__attribute__((aligned(64)))tensorium::GemmKernelBigger< T >inlinestatic
__attribute__((aligned(64)))tensorium::GemmKernelBigger< T >static
__attribute__((aligned(64)))tensorium::GemmKernelBigger< T >static
BlockColstensorium::GemmKernelBigger< T >static
BlockDepthtensorium::GemmKernelBigger< T >static
BlockRowstensorium::GemmKernelBigger< T >static
build_masks(__m256i *packed_mask_0, __m256i *packed_mask_1, int mr)tensorium::GemmKernelBigger< T >inlinestatic
fma_loop_00(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
fma_loop_01(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
fma_loop_02(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
fma_loop_03(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
fma_loop_04(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
fma_loop_05(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc)tensorium::GemmKernelBigger< T >inline
kernel_16x6_load_accum(T *blockA_packed, T *blockB_packed, T *C, int mr, int nr, int kc, int M)tensorium::GemmKernelBigger< T >inline
kernel_16x6_zero_init_accum(T *blockA_packed, T *blockB_packed, T *C, int mr, int nr, int kc, int M)tensorium::GemmKernelBigger< T >inline
load_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, int M)tensorium::GemmKernelBigger< T >inline
load_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, int M)tensorium::GemmKernelBigger< T >inline
load_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, int M)tensorium::GemmKernelBigger< T >inline
load_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, int M)tensorium::GemmKernelBigger< T >inline
load_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, int M)tensorium::GemmKernelBigger< T >inline
load_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskload_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
maskstore_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, __m256i packed_mask_0, __m256i packed_mask_1, int M)tensorium::GemmKernelBigger< T >inline
matmul(T *A, T *B, T *C, int M, int N, int K)tensorium::GemmKernelBigger< T >inline
NThreadstensorium::GemmKernelBigger< T >static
pack_blockA(T *A, T *blockA_packed, int mc, int kc, int M)tensorium::GemmKernelBigger< T >inline
pack_blockB(T *B, T *blockB_packed, int nc, int kc, int K)tensorium::GemmKernelBigger< T >inline
pack_panelA(T *A, T *blockA_packed, int mr, int kc, int M)tensorium::GemmKernelBigger< T >inline
pack_panelB(T *B, T *blockB_packed, int nr, int kc, int K)tensorium::GemmKernelBigger< T >inline
reg typedeftensorium::GemmKernelBigger< T >
Simd typedeftensorium::GemmKernelBigger< T >
SimdWidthtensorium::GemmKernelBigger< T >static
store_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, int M)tensorium::GemmKernelBigger< T >inline
store_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, int M)tensorium::GemmKernelBigger< T >inline
store_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, int M)tensorium::GemmKernelBigger< T >inline
store_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, int M)tensorium::GemmKernelBigger< T >inline
store_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, int M)tensorium::GemmKernelBigger< T >inline
store_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, int M)tensorium::GemmKernelBigger< T >inline
TileColstensorium::GemmKernelBigger< T >static
TileRowstensorium::GemmKernelBigger< T >static