Macros
#define	CEILCOLS(i) ((i+31)/32)

#define	read_only_global __global gpuword*

#define	write_only_global __global gpuword*

#define	read(M, row, col) M[(col)*M ## _nrows + row]

#define	write(M, row, col, x) M[(col)*M ## _nrows + row]=x

#define	MIN(x, y) (((x) < (y)) ? (x) : (y))

#define	POW2(x) (((gpuword)1) << x)

#define	A_width CEILCOLS(A_ncols)

#define	C_ncols B_ncols

#define	C_width CEILCOLS(C_ncols)

#define	B_nrows A_ncols

#define	C_nrows A_nrows

Typedefs
typedef unsigned int	gpuword

Functions
gpuword	read_bits (gpuword a0, gpuword a1, int spot, int n)
	read 32 bits from memory, not necessarily aligned to word boundaries More...

gpuword	combinate (gpuword x, int k, __local gpuword *T)

__kernel void	clm4rm_mul (write_only_global C, read_only_global A, read_only_global B, __local gpuword *T, int k, int r0, int A_nrows, int A_ncols, int B_ncols)
	OpenCL kernel for M4R matrix Multiplication C := A*B. More...

Macro Definition Documentation

◆ A_width

#define A_width CEILCOLS(A_ncols)

◆ B_nrows

#define B_nrows A_ncols

◆ C_ncols

#define C_ncols B_ncols

Definition at line 323 of file ocl_prototype.cpp.

◆ C_nrows

#define C_nrows A_nrows

◆ C_width

#define C_width CEILCOLS(C_ncols)

Definition at line 324 of file ocl_prototype.cpp.

◆ CEILCOLS

#define CEILCOLS ( i ) ((i+31)/32)

Definition at line 7 of file clm4rm_mul.cl.

◆ MIN

#define MIN	(	x,
		y
	)	(((x) < (y)) ? (x) : (y))

Definition at line 30 of file clm4rm_mul.cl.

◆ POW2

#define POW2 ( x ) (((gpuword)1) << x)

Definition at line 31 of file clm4rm_mul.cl.

◆ read

#define read	(	M,
		row,
		col
	)	M[(col)*M ## _nrows + row]

Definition at line 26 of file clm4rm_mul.cl.

◆ read_only_global

#define read_only_global __global gpuword*

Definition at line 24 of file clm4rm_mul.cl.

◆ write

#define write	(	M,
		row,
		col,
		x
	)	M[(col)*M ## _nrows + row]=x

Definition at line 27 of file clm4rm_mul.cl.

◆ write_only_global

#define write_only_global __global gpuword*

Definition at line 25 of file clm4rm_mul.cl.

Typedef Documentation

◆ gpuword

typedef unsigned int gpuword

Methods of the 4 Russians Multiplication

Definition at line 5 of file clm4rm_mul.cl.

Function Documentation

◆ clm4rm_mul()

__kernel void clm4rm_mul	(	write_only_global	C,
		read_only_global	A,
		read_only_global	B,
		__local gpuword *	T,
		int	k,
		int	r0,
		int	A_nrows,
		int	A_ncols,
		int	B_ncols
	)

OpenCL kernel for M4R matrix Multiplication C := A*B.

Parameters

C	destination matrix
A	source matrix
B	source matrix
T	buffer used for lookup tables
k	parameter k
r0	first row in A
A_nrows	number of rows in A
A_ncols	number of cols in A == number of rows in B == number of rows in C
B_ncols	number of cols in B == number of cols in C

Definition at line 74 of file clm4rm_mul.cl.

◆ combinate()

gpuword combinate	(	gpuword	x,
		int	k,
		__local gpuword *	T
	)

Definition at line 52 of file clm4rm_mul.cl.

◆ read_bits()

gpuword read_bits	(	gpuword	a0,
		gpuword	a1,
		int	spot,
		int	n
	)

read 32 bits from memory, not necessarily aligned to word boundaries