Fréchet View  1.6.0
A Tool for Exploring Fréchet Distance Algorithms
clm4rm.cpp File Reference
#include <clm4rm.h>
#include <stdio.h>
#include <qdebug.h>

Go to the source code of this file.

Macros

#define str(S)   #S
 

Functions

cl_program load_program (const char *cl_kernel_directory, const char *file_name, cl_context ctx)
 
cl_build_status build_program (cl_program program, cl_device_id device, int tile_m)
 
cl_int clm4rm_setup (const char *cl_kernel_directory, cl_context ctx, cl_device_id device)
 load OpenCL kernels and set up parameters More...
 
void clm4rm_tear_down (cl_context ctx, cl_device_id device)
 release OpenCL resources More...
 
void assertMatrixLayout (const clmatrix_t *gpu_matrix, const mzd_t *host_matrix)
 
int padded_rows (int nrows, int padding)
 calculate the number of padded rows More...
 
clmatrix_tclm4rm_allocate (int rows, int cols, int rowpadding)
 
void track_heap_size (size_t sz)
 
clmatrix_tclm4rm_create (rci_t rows, rci_t cols, int rowpadding, int read_only, cl_context ctx)
 create an empty matrix More...
 
clmatrix_tclm4rm_copy (const mzd_t *host_matrix, int rowpadding, int read_only, cl_context ctx)
 ceate a copy from a matrix in M4RI format More...
 
void init_events (clm4rm_event_list *list)
 reset events list More...
 
void init_conditions (clm4rm_conditions *cond)
 reset conditions list More...
 
void release_events (clm4rm_event_list *list)
 release events More...
 
void release_conditions (clm4rm_conditions *cond)
 release conditions list More...
 
void merge_events (clm4rm_event_list *a, clm4rm_event_list *b)
 append tow lists More...
 
void merge_conditions (clm4rm_conditions *a, clm4rm_conditions *b)
 merge pre-conditions into one list More...
 
void join_conditions (clm4rm_conditions *cond)
 called when the pre-conditions are met. The post-conditions become new pre-conditioins. More...
 
cl_uint pre_count (clm4rm_conditions *cond)
 
cl_event * pre_events (clm4rm_conditions *cond)
 
cl_event * push_event (clm4rm_conditions *cond)
 reserve one post-condition event More...
 
cl_event * pushed_event (clm4rm_conditions *cond)
 
void clm4rm_zero_fill (clmatrix_t *gpu_matrix, cl_command_queue queue, clm4rm_conditions *cond)
 Fill a matrix with zero data. The operation is scheduled for asynchronous execution of the GPU. The function returns immediately. Use post-condition events to wait for the execution of the operation. More...
 
void clm4rm_write (clmatrix_t *gpu_matrix, const mzd_t *host_matrix, cl_command_queue queue, clm4rm_conditions *cond)
 Copy matrix data from host memory to GPU. The operation is scheduled for asynchronous execution of the GPU. The function returns immediately. Use post-condition events to wait for the execution of the operation. More...
 
mzd_t * clm4rm_read (mzd_t *host_matrix, clmatrix_t *gpu_matrix, cl_command_queue queue, clm4rm_conditions *cond)
 copy matrix from gpu memory to host More...
 
gpuwordcopy_matrix_data (gpuword *G, const mzd_t *M, int padded_rows)
 create a column-major copy from an mzd_t matrix More...
 
void copy_back_matrix_data (mzd_t *M, const gpuword *G, int padded_rows)
 copy back a colum–major matrix More...
 

Variables

cl_int clm4rm_error
 latest OpenCL result code. CL_SUCCESS indicates no error. More...
 
cl_kernel clm4rm_and_kernel
 
cl_kernel clm4rm_or_kernel
 
cl_kernel clm4rm_copy_kernel
 
cl_kernel clm4rm_query_diagonal_kernel
 
cl_kernel clm4rm_mul_kernel
 OpenCL kernel for Four-Russians matrix multiplication. More...
 
cl_kernel clcubic_mul_kernel [MAX_TILE_M+1]
 OpenCL kernels for cubic matrix multiplication. Each kernel for a tile size. Actual tile sizes are injected as macros. More...
 
cl_kernel clutri_mul_kernel [MAX_TILE_M+1]
 OpenCL kernels for cubic upper-triangle matrix multiplication. Each kernel for a tile size. Actual tile sizes are injected as macros. More...
 
size_t max_group_size
 max. size of a work group More...
 
size_t max_items [3]
 max. number of items in each dimension More...
 
size_t shared_mem_bytes
 size of shared memory in bytes More...
 
size_t shared_mem_words
 size of shared memory in (32bit) words More...
 
size_t heap_size
 size of allocated memory in bytes More...
 
size_t allocated_size =0
 
size_t max_object_size
 max. object allocation size More...
 
cl_program programs [MAX_TILE_M+1]
 
cl_image_format IMAGE_FORMAT = { CL_R, CL_UNSIGNED_INT32 }
 
bool printed_heap_warning = false
 

Macro Definition Documentation

◆ str

#define str (   S)    #S

Function Documentation

◆ assertMatrixLayout()

void assertMatrixLayout ( const clmatrix_t gpu_matrix,
const mzd_t *  host_matrix 
)

Definition at line 175 of file clm4rm.cpp.

◆ build_program()

cl_build_status build_program ( cl_program  program,
cl_device_id  device,
int  tile_m 
)

Definition at line 36 of file clm4rm.cpp.

◆ clm4rm_allocate()

clmatrix_t* clm4rm_allocate ( int  rows,
int  cols,
int  rowpadding 
)

Definition at line 192 of file clm4rm.cpp.

◆ clm4rm_copy()

clmatrix_t* clm4rm_copy ( const mzd_t *  host_matrix,
int  rowpadding,
int  read_only,
cl_context  ctx 
)

ceate a copy from a matrix in M4RI format

Parameters
host_matrixmatrix data in M4RI format
rowpaddingdesired padding
read_onlyif 1, create a read-only buffer in GPU memory
ctxOpenCL context
Returns
a newly allocated matrix structure. Both, CPU memory and GPU memory are allocated and filled with data.

Definition at line 254 of file clm4rm.cpp.

◆ clm4rm_create()

clmatrix_t* clm4rm_create ( rci_t  rows,
rci_t  cols,
int  rowpadding,
int  read_only,
cl_context  ctx 
)

create an empty matrix

Parameters
rowsnumber of rows
colsnumber of columns
rowpaddingpad rows to multiples of 32, or 64
read_only1 if the GPU memory buffer should be read only
ctxOpenCL context
Returns
a newly allocated matrix structure. Both, CPU memory and GPU memory are allocated.

Definition at line 233 of file clm4rm.cpp.

◆ clm4rm_read()

mzd_t* clm4rm_read ( mzd_t *  host_matrix,
clmatrix_t gpu_matrix,
cl_command_queue  queue,
clm4rm_conditions cond 
)

copy matrix from gpu memory to host

Parameters
host_matrixmatrix data in M4RI format; if nullptr, allocate a new one
gpu_matrixa matrix structure
queueOpenCL command queue
condkeeps track of pre-conditions and newly created post-conditions
Returns
pointer to a matrix structure inf M4RI format

Definition at line 406 of file clm4rm.cpp.

◆ clm4rm_setup()

cl_int clm4rm_setup ( const char *  cl_kernel_directory,
cl_context  ctx,
cl_device_id  device 
)

load OpenCL kernels and set up parameters

Parameters
cl_kernel_directorylocation on disk where the kernel source code files (*.cl) are stored
ctxOpenCL context
deviceOpenCL device
Returns
OpenCL error code. 0 means no error.

Definition at line 84 of file clm4rm.cpp.

◆ clm4rm_tear_down()

void clm4rm_tear_down ( cl_context  ctx,
cl_device_id  device 
)

release OpenCL resources

Parameters
ctxOpenCL context
deviceOpenCL device

Definition at line 146 of file clm4rm.cpp.

◆ clm4rm_write()

void clm4rm_write ( clmatrix_t gpu_matrix,
const mzd_t *  host_matrix,
cl_command_queue  queue,
clm4rm_conditions cond 
)

Copy matrix data from host memory to GPU. The operation is scheduled for asynchronous execution of the GPU. The function returns immediately. Use post-condition events to wait for the execution of the operation.

M4RI data are 64-bit unsigned int M4RM data are supposed to be 32-bit unsigned int

Casting and copying is sane if both platforms are LITTLE-ENDIAN.

Definition at line 382 of file clm4rm.cpp.

◆ clm4rm_zero_fill()

void clm4rm_zero_fill ( clmatrix_t gpu_matrix,
cl_command_queue  queue,
clm4rm_conditions cond 
)

Fill a matrix with zero data. The operation is scheduled for asynchronous execution of the GPU. The function returns immediately. Use post-condition events to wait for the execution of the operation.

Parameters
gpu_matrixa matrix structure
queueOpenCL command queue
condkeeps track of pre-conditions and newly created post-conditions

Definition at line 364 of file clm4rm.cpp.

◆ copy_back_matrix_data()

void copy_back_matrix_data ( mzd_t *  dest,
const gpuword src,
int  padded_rows 
)

copy back a colum–major matrix

Parameters
destdestination data in M4RI format
srcinput data
padded_rowsnumber of rows (padded)

Definition at line 460 of file clm4rm.cpp.

◆ copy_matrix_data()

gpuword* copy_matrix_data ( gpuword dest,
const mzd_t *  src,
int  padded_rows 
)

create a column-major copy from an mzd_t matrix

Parameters
destdestination data in clmatrix format
srcinput data in M4RI format
padded_rowsnumber of words (padded)
Returns
pointer to CPU matrix data

Definition at line 436 of file clm4rm.cpp.

◆ init_conditions()

void init_conditions ( clm4rm_conditions cond)

reset conditions list

Parameters
conda list of pre- and post-conditions

Definition at line 284 of file clm4rm.cpp.

◆ init_events()

void init_events ( clm4rm_event_list list)

reset events list

Parameters
lista list of OpenCL events

Definition at line 277 of file clm4rm.cpp.

◆ join_conditions()

void join_conditions ( clm4rm_conditions cond)

called when the pre-conditions are met. The post-conditions become new pre-conditioins.

Parameters
conda list of pre- and post-conditions

Definition at line 319 of file clm4rm.cpp.

◆ load_program()

cl_program load_program ( const char *  cl_kernel_directory,
const char *  file_name,
cl_context  ctx 
)

Definition at line 11 of file clm4rm.cpp.

◆ merge_conditions()

void merge_conditions ( clm4rm_conditions a,
clm4rm_conditions b 
)

merge pre-conditions into one list

Parameters
aa list of pre- and post-conditions
banother list of pre- and post-conditions

Definition at line 314 of file clm4rm.cpp.

◆ merge_events()

void merge_events ( clm4rm_event_list a,
clm4rm_event_list b 
)

append tow lists

Parameters
aa list of OpenCL events
banother list of OpenCL events

Definition at line 303 of file clm4rm.cpp.

◆ padded_rows()

int padded_rows ( int  nrows,
int  padding 
)

calculate the number of padded rows

Parameters
nrowsactual matrix rows
paddingdesired padding (32, or 64)
Returns
number of padded rows

Definition at line 185 of file clm4rm.cpp.

◆ pre_count()

cl_uint pre_count ( clm4rm_conditions cond)
Parameters
conda list of pre- and post-conditions
Returns
number of pre-conditioins

Definition at line 331 of file clm4rm.cpp.

◆ pre_events()

cl_event* pre_events ( clm4rm_conditions cond)
Parameters
conda list of pre- and post-conditions
Returns
pointer to list of pre-conditions

Definition at line 338 of file clm4rm.cpp.

◆ push_event()

cl_event* push_event ( clm4rm_conditions cond)

reserve one post-condition event

Parameters
conda list of pre- and post-conditions
Returns
pointer to reserved event

Definition at line 348 of file clm4rm.cpp.

◆ pushed_event()

cl_event* pushed_event ( clm4rm_conditions cond)
Parameters
conda list of pre- and post-conditions
Returns
pointer to last reserved event

Definition at line 357 of file clm4rm.cpp.

◆ release_conditions()

void release_conditions ( clm4rm_conditions cond)

release conditions list

Parameters
conda list of pre- and post-conditions

Definition at line 297 of file clm4rm.cpp.

◆ release_events()

void release_events ( clm4rm_event_list list)

release events

Parameters
lista list of OpenCL events

Definition at line 290 of file clm4rm.cpp.

◆ track_heap_size()

void track_heap_size ( size_t  sz)

Definition at line 218 of file clm4rm.cpp.

Variable Documentation

◆ allocated_size

size_t allocated_size =0

Definition at line 78 of file clm4rm.cpp.

◆ clcubic_mul_kernel

cl_kernel clcubic_mul_kernel[MAX_TILE_M+1]

OpenCL kernels for cubic matrix multiplication. Each kernel for a tile size. Actual tile sizes are injected as macros.

Definition at line 71 of file clm4rm.cpp.

◆ clm4rm_and_kernel

cl_kernel clm4rm_and_kernel

Definition at line 63 of file clm4rm.cpp.

◆ clm4rm_copy_kernel

cl_kernel clm4rm_copy_kernel

Definition at line 65 of file clm4rm.cpp.

◆ clm4rm_error

cl_int clm4rm_error

latest OpenCL result code. CL_SUCCESS indicates no error.

Definition at line 9 of file clm4rm.cpp.

◆ clm4rm_mul_kernel

cl_kernel clm4rm_mul_kernel

OpenCL kernel for Four-Russians matrix multiplication.

Definition at line 68 of file clm4rm.cpp.

◆ clm4rm_or_kernel

cl_kernel clm4rm_or_kernel

Definition at line 64 of file clm4rm.cpp.

◆ clm4rm_query_diagonal_kernel

cl_kernel clm4rm_query_diagonal_kernel

Definition at line 66 of file clm4rm.cpp.

◆ clutri_mul_kernel

cl_kernel clutri_mul_kernel[MAX_TILE_M+1]

OpenCL kernels for cubic upper-triangle matrix multiplication. Each kernel for a tile size. Actual tile sizes are injected as macros.

Definition at line 72 of file clm4rm.cpp.

◆ heap_size

size_t heap_size

size of allocated memory in bytes

Definition at line 78 of file clm4rm.cpp.

◆ IMAGE_FORMAT

cl_image_format IMAGE_FORMAT = { CL_R, CL_UNSIGNED_INT32 }

Definition at line 173 of file clm4rm.cpp.

◆ max_group_size

size_t max_group_size

max. size of a work group

Definition at line 74 of file clm4rm.cpp.

◆ max_items

size_t max_items[3]

max. number of items in each dimension

Definition at line 75 of file clm4rm.cpp.

◆ max_object_size

size_t max_object_size

max. object allocation size

Definition at line 79 of file clm4rm.cpp.

◆ printed_heap_warning

bool printed_heap_warning = false

Definition at line 216 of file clm4rm.cpp.

◆ programs

cl_program programs[MAX_TILE_M+1]

Definition at line 81 of file clm4rm.cpp.

◆ shared_mem_bytes

size_t shared_mem_bytes

size of shared memory in bytes

Definition at line 77 of file clm4rm.cpp.

◆ shared_mem_words

size_t shared_mem_words

size of shared memory in (32bit) words

Definition at line 77 of file clm4rm.cpp.