Fréchet View  1.6.0
A Tool for Exploring Fréchet Distance Algorithms
clm4rm_bitwise.cl
Go to the documentation of this file.
1 
2 
3 typedef unsigned int gpuword;
4 
5 #if IMAGE2D
6 //
7 // Matrix stored in texture memory
8 //
9 # define read_only_global __read_only image2d_t
10 # define write_only_global __write_only image2d_t
11 // Note: column-major format
12 // a matrix colum is actually a row (y-coordinate) in Image2D
13 // a matrix row is actually a column (x-coordinate) in Image2D
14 // Pixel contains only one (red) component
15 # define read(M,row,col) read_imageui(M,(int2)(row,col)).x
16 # define write(M,row,col,x) write_imageui(M,(int2)(row,col),(uint4)(x,0,0,0))
17 #else
18 //
19 // Matrix stored in __global memory
20 //
21 # define read_only_global __global gpuword*
22 # define write_only_global __global gpuword*
23 # define read(M,row,col) M[(col)*M ## _nrows + row]
24 # define write(M,row,col,x) M[(col)*M ## _nrows + row]=x
25 #endif
26 
27 
35 __kernel void clm4rm_and(
39 {
40  const int A_nrows = get_global_size(0);
41 #define B_nrows A_nrows
42 #define C_nrows A_nrows
43 
44  const int row = get_global_id(0);
45  const int col = get_global_id(1);
46 
47  gpuword a = read(A,row,col);
48  gpuword b = read(B,row,col);
49 
50  write(C,row,col, a & b);
51 }
52 
60 __kernel void clm4rm_or(
64 {
65  const int A_nrows = get_global_size(0);
66 
67  const int row = get_global_id(0);
68  const int col = get_global_id(1);
69 
70  gpuword a = read(A,row,col);
71  gpuword b = read(B,row,col);
72 
73  write(C,row,col, a | b);
74 }
75 
84 __kernel void clm4rm_copy(
85  __global unsigned int* C, int C_rowstride,
86  __global unsigned int* B, int B_rowstride,
87  int offset )
88 {
89  const int i = get_global_id(0);
90  const int j = get_global_id(1);
91 
92  unsigned int word_offset = offset/32;
93  unsigned int bit_offset = offset%32;
94 
95  C = C + i*C_rowstride + word_offset;
96  B = B + i*B_rowstride;
97 
98  unsigned int cj;
99  if (j==0)
100  cj = (C[0] & ((1<<bit_offset)-1));
101  else
102  cj = (B[j-1] >> (32-bit_offset));
103  cj |= (B[j] << bit_offset);
104  C[j] = cj;
105 }
106 
107 
108 #define WRITE_ATOMIC 1
109 
117 __kernel void clm4rm_query_diagonal(
119  int M_nrows,
120 #if WRITE_ATOMIC
121  volatile
122 #endif
123  __global int* result)
124 {
125  const int i = get_global_id(0);
126 
127  // query M[i][i]
128  gpuword word = read(M, i, i/32);
129  word >>= i%32;
130 
131 #if WRITE_ATOMIC
132  if ((word & 1) && (*result==-1))
133  atomic_xchg (result, i);
134  // If atomic fails, someone else was faster. No matter.
135 #else
136  if ((word & 1) && (*result==-1))
137  *result = i;
138 #endif
139 }
__kernel void clm4rm_query_diagonal(read_only_global M, int M_nrows, volatile __global int *result)
Query Matrix Diagonal.
unsigned int gpuword
a GPU word has 32 bits
Definition: clcubic_mul.cl:74
__kernel void clm4rm_or(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise OR on three matrixes C := A | B.
#define read_only_global
#define read(M, row, col)
__kernel void clm4rm_and(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise AND on three matrixes C := A & B.
#define write_only_global
#define WRITE_ATOMIC
#define write(M, row, col, x)
__kernel void clm4rm_copy(__global unsigned int *C, int C_rowstride, __global unsigned int *B, int B_rowstride, int offset)
copy with offset
unsigned int gpuword