9 # define read_only_global __read_only image2d_t 10 # define write_only_global __write_only image2d_t 15 # define read(M,row,col) read_imageui(M,(int2)(row,col)).x 16 # define write(M,row,col,x) write_imageui(M,(int2)(row,col),(uint4)(x,0,0,0)) 21 # define read_only_global __global gpuword* 22 # define write_only_global __global gpuword* 23 # define read(M,row,col) M[(col)*M ## _nrows + row] 24 # define write(M,row,col,x) M[(col)*M ## _nrows + row]=x 40 const int A_nrows = get_global_size(0);
41 #define B_nrows A_nrows 42 #define C_nrows A_nrows 44 const int row = get_global_id(0);
45 const int col = get_global_id(1);
50 write(C,row,col, a & b);
65 const int A_nrows = get_global_size(0);
67 const int row = get_global_id(0);
68 const int col = get_global_id(1);
73 write(C,row,col, a | b);
85 __global
unsigned int* C,
int C_rowstride,
86 __global
unsigned int* B,
int B_rowstride,
89 const int i = get_global_id(0);
90 const int j = get_global_id(1);
92 unsigned int word_offset = offset/32;
93 unsigned int bit_offset = offset%32;
95 C = C + i*C_rowstride + word_offset;
96 B = B + i*B_rowstride;
100 cj = (C[0] & ((1<<bit_offset)-1));
102 cj = (B[j-1] >> (32-bit_offset));
103 cj |= (B[j] << bit_offset);
108 #define WRITE_ATOMIC 1 123 __global
int* result)
125 const int i = get_global_id(0);
132 if ((word & 1) && (*result==-1))
133 atomic_xchg (result, i);
136 if ((word & 1) && (*result==-1))
__kernel void clm4rm_query_diagonal(read_only_global M, int M_nrows, volatile __global int *result)
Query Matrix Diagonal.
unsigned int gpuword
a GPU word has 32 bits
__kernel void clm4rm_or(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise OR on three matrixes C := A | B.
#define read(M, row, col)
__kernel void clm4rm_and(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise AND on three matrixes C := A & B.
#define write_only_global
#define write(M, row, col, x)
__kernel void clm4rm_copy(__global unsigned int *C, int C_rowstride, __global unsigned int *B, int B_rowstride, int offset)
copy with offset