Fréchet View  1.6.0
A Tool for Exploring Fréchet Distance Algorithms
clm4rm_mul.cl File Reference

Go to the source code of this file.

Macros

#define CEILCOLS(i)   ((i+31)/32)
 
#define read_only_global   __global gpuword*
 
#define write_only_global   __global gpuword*
 
#define read(M, row, col)   M[(col)*M ## _nrows + row]
 
#define write(M, row, col, x)   M[(col)*M ## _nrows + row]=x
 
#define MIN(x, y)   (((x) < (y)) ? (x) : (y))
 
#define POW2(x)   (((gpuword)1) << x)
 
#define A_width   CEILCOLS(A_ncols)
 
#define C_ncols   B_ncols
 
#define C_width   CEILCOLS(C_ncols)
 
#define B_nrows   A_ncols
 
#define C_nrows   A_nrows
 

Typedefs

typedef unsigned int gpuword
 

Functions

gpuword read_bits (gpuword a0, gpuword a1, int spot, int n)
 read 32 bits from memory, not necessarily aligned to word boundaries More...
 
gpuword combinate (gpuword x, int k, __local gpuword *T)
 
__kernel void clm4rm_mul (write_only_global C, read_only_global A, read_only_global B, __local gpuword *T, int k, int r0, int A_nrows, int A_ncols, int B_ncols)
 OpenCL kernel for M4R matrix Multiplication C := A*B. More...
 

Macro Definition Documentation

◆ A_width

#define A_width   CEILCOLS(A_ncols)

◆ B_nrows

#define B_nrows   A_ncols

◆ C_ncols

#define C_ncols   B_ncols

Definition at line 323 of file ocl_prototype.cpp.

◆ C_nrows

#define C_nrows   A_nrows

◆ C_width

#define C_width   CEILCOLS(C_ncols)

Definition at line 324 of file ocl_prototype.cpp.

◆ CEILCOLS

#define CEILCOLS (   i)    ((i+31)/32)

Definition at line 7 of file clm4rm_mul.cl.

◆ MIN

#define MIN (   x,
 
)    (((x) < (y)) ? (x) : (y))

Definition at line 30 of file clm4rm_mul.cl.

◆ POW2

#define POW2 (   x)    (((gpuword)1) << x)

Definition at line 31 of file clm4rm_mul.cl.

◆ read

#define read (   M,
  row,
  col 
)    M[(col)*M ## _nrows + row]

Definition at line 26 of file clm4rm_mul.cl.

◆ read_only_global

#define read_only_global   __global gpuword*

Definition at line 24 of file clm4rm_mul.cl.

◆ write

#define write (   M,
  row,
  col,
 
)    M[(col)*M ## _nrows + row]=x

Definition at line 27 of file clm4rm_mul.cl.

◆ write_only_global

#define write_only_global   __global gpuword*

Definition at line 25 of file clm4rm_mul.cl.

Typedef Documentation

◆ gpuword

typedef unsigned int gpuword

Methods of the 4 Russians Multiplication

Definition at line 5 of file clm4rm_mul.cl.

Function Documentation

◆ clm4rm_mul()

__kernel void clm4rm_mul ( write_only_global  C,
read_only_global  A,
read_only_global  B,
__local gpuword T,
int  k,
int  r0,
int  A_nrows,
int  A_ncols,
int  B_ncols 
)

OpenCL kernel for M4R matrix Multiplication C := A*B.

Parameters
Cdestination matrix
Asource matrix
Bsource matrix
Tbuffer used for lookup tables
kparameter k
r0first row in A
A_nrowsnumber of rows in A
A_ncolsnumber of cols in A == number of rows in B == number of rows in C
B_ncolsnumber of cols in B == number of cols in C

Definition at line 74 of file clm4rm_mul.cl.

◆ combinate()

gpuword combinate ( gpuword  x,
int  k,
__local gpuword T 
)

Definition at line 52 of file clm4rm_mul.cl.

◆ read_bits()

gpuword read_bits ( gpuword  a0,
gpuword  a1,
int  spot,
int  n 
)

read 32 bits from memory, not necessarily aligned to word boundaries

Parameters
a0first word to read from
a1second word to read from
spotbit offset
nnumber of bits to read
Returns
extracted n bits

Definition at line 41 of file clm4rm_mul.cl.