Fréchet View  1.6.0
A Tool for Exploring Fréchet Distance Algorithms
graph_cl.cpp
Go to the documentation of this file.
1 
2 #include <graph_cl.h>
3 #include <concurrency.h>
4 
5 using namespace frechet;
6 using namespace reach;
7 using namespace app;
8 
9 /*
10  * Factory Methods
11  */
13 {
14  if (ConcurrencyContext::hasGpuSupport())
15  return GraphPtr(new GraphCL(model));
16  else
17  return GraphPtr(new Graph(model));
18 }
19 
21 {
22  if (ConcurrencyContext::hasGpuSupport())
23  return GraphPtr(new GraphCL(model,hmask));
24  else
25  return GraphPtr(new Graph(model, hmask));
26 }
27 
29 {
30  if (ConcurrencyContext::hasGpuSupport())
31  return GraphPtr(new GraphCL(model,str));
32  else
33  return GraphPtr(new Graph(model, str));
34 }
35 
36 
38  : Graph(model),
39  clmtx{ {nullptr,nullptr},{nullptr,nullptr} },
40  temps(), cond(), diagonalElementBuffer(nullptr)
41 {
42  init_conditions(&cond);
43 }
44 
46  : Graph(model, hmask),
47  clmtx{ { nullptr,nullptr },{ nullptr,nullptr } },
48  temps(), cond(), diagonalElementBuffer(nullptr)
49 {
50  init_conditions(&cond);
51 }
52 
54  : Graph(model, str),
55  clmtx{ {nullptr, nullptr}, { nullptr,nullptr } },
56  temps(), cond(), diagonalElementBuffer(nullptr)
57 {
58  init_conditions(&cond);
59 }
60 
62  : Graph(that)
63 {
64  copy(that);
65 }
66 
68  : Graph(that)
69 {
70  swap(that);
71 }
72 
74 {
75  for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
76  for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2) {
77  clm4rm_free(clmtx[o1][o2]);
78  clmtx[o1][o2] = nullptr;
79  }
80  for (clmatrix_t* t : temps)
81  clm4rm_free(t);
82  temps.clear();
83 
85  clReleaseMemObject(diagonalElementBuffer);
87 }
88 
90  Graph::operator=(that);
91  copy(that);
92  return *this;
93 }
94 
96  Graph::operator=(that);
97  swap(that);
98  return *this;
99 }
100 
101 void GraphCL::copy(const GraphCL& that)
102 {
103  for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
104  for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
105  if (that.clmtx[o1][o2])
106  clmtx[o1][o2] = clm4rm_copy(that.mtx[o1][o2], 32, true, ConcurrencyContext::clContext());
107  else
108  clmtx[o1][o2] = nullptr;
109  // don't copy diagonalElementBuffer
110 }
111 
113 {
114  // steal from 'that'
115  for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
116  for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
117  std::swap(clmtx[o1][o2], that.clmtx[o1][o2]);
119 }
120 
122 {
123  Graph::release(o1, o2);
124  if (clmtx[o1][o2]) {
125  clm4rm_free(clmtx[o1][o2]);
126  clmtx[o1][o2] = nullptr;
127  }
128 }
129 
135  Graph::finalize(); // releaseIfZero
136  synchToGpu();
137 }
143 {
145 }
146 
148  // copy to GPU memory
149  for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
150  for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
151  {
152  mzd_t* M = mtx[o1][o2];
153  clmatrix_t* G = clmtx[o1][o2];
154 
155  if (!M && !G)
156  continue;
157 
158  if (G) {
159  clm4rm_free(G);
160  G = clmtx[o1][o2] = nullptr;
161  }
162 
163  if (M) {
164  G = clmtx[o1][o2] = clm4rm_create(M->nrows, M->ncols, 32,
165  true, ConcurrencyContext::clContext());
166  clm4rm_write(G, M, ConcurrencyContext::clQueue(), &cond);
167  // Note: clm4rm_copy does the same but is always _blocking_
169  }
170  }
171 }
172 
174  // copy to GPU memory
175  for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
176  for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
177  {
178  if (!mtx[o1][o2] && !clmtx[o1][o2])
179  continue;
180 
181  if (clmtx[o1][o2]) {
182  mtx[o1][o2] = clm4rm_read(mtx[o1][o2], clmtx[o1][o2],
183  ConcurrencyContext::clQueue(), &cond);
185  }
186  }
187 }
188 
189 // (*this)_VV &= that_VV
190 void GraphCL::combine(const Graph* Bg)
191 {
192  const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
193  Q_ASSERT(B);
194 
195  clmatrix_t* A_VV = this->clmtx[VERTICAL][VERTICAL];
196  clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];
197 
198  Q_ASSERT((this->mtx[VERTICAL][VERTICAL]==nullptr) || (A_VV!=nullptr));
199  Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));
200 
201  if (!A_VV) return;
202  if (!B_VV) {
204  return;
205  }
206 
207  merge_conditions(&cond,&B->cond);
208 #if IMAGE2D
209  // can't do clm4rm_and in-place. Use temp matrix instead:
210  clmatrix_t* T = tempMatrix(A_VV->nrows,A_VV->ncols);
211  clm4rm_and(T, A_VV, B_VV, ConcurrencyContext::clQueue(),&cond);
212  std::swap(A_VV->data,T->data);
213  // don't release T. Everything is (will be) asynchronous.
214 #else
215  // VV are upper triangular matrices
216  clm4rm_and(A_VV, A_VV, B_VV, ConcurrencyContext::clQueue(),&cond);
217  // TODO clutri_and
218 #endif
220 }
221 
222 void merge2(const Graph* A, const Graph* B, MatrixPool* pool);
223 
224 void merge3(const Graph* A, const Graph* B, MatrixPool* pool);
225 
226 // @return (*this)_VV * that_VV
227 void GraphCL::merge2(const Graph* Ag, const Graph* Bg, MatrixPool* pool)
228 {
229  const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
230  const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
231  Q_ASSERT(A && B);
232  Q_ASSERT(is_adjacent_to(*A, *B));
233 
234  GraphCL* C = this;
235 
236  clmatrix_t* A_VV = A->clmtx[VERTICAL][VERTICAL];
237  clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];
238 
239  Q_ASSERT((A->mtx[VERTICAL][VERTICAL]==nullptr) || (A_VV!=nullptr));
240  Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));
241 
242  if (!A_VV || !B_VV)
243  return;
244 
245  merge_conditions(&cond,&A->cond);
246  merge_conditions(&cond,&B->cond);
247 
248  // both matrices are upper triangular
249  size2_t max_tile;
250  ConcurrencyContext::maxMaxtrixTile(max_tile);
251  clmatrix_t* C_VV = new_clmatrix(A_VV->nrows,B_VV->ncols,pool,&cond);
253 
254  clutri_mul(C_VV, A_VV, B_VV, max_tile, ConcurrencyContext::clQueue(), &cond);
256 
257  this->clmtx[VERTICAL][VERTICAL] = C_VV;
258  Q_ASSERT(C_VV->nrows == this->mask[VERTICAL].len());
259  Q_ASSERT(C_VV->ncols == this->mask[VERTICAL].len());
260 }
261 
262 // @return (*this)_HV * that_VV * (*this)_VH
263 void GraphCL::merge3(const Graph* Ag, const Graph* Bg, MatrixPool* pool)
264 {
265  const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
266  const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
267  Q_ASSERT(A && B);
268  Q_ASSERT(is_adjacent_to(*A, *B));
269  Q_ASSERT(is_adjacent_to(*B, *A));
270 
271  clmatrix_t* A_HV = A->clmtx[HORIZONTAL][VERTICAL];
272  clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];
273  clmatrix_t* A_VH = A->clmtx[VERTICAL][HORIZONTAL];
274 
275  Q_ASSERT((A_HV!=nullptr)==(A->mtx[HORIZONTAL][VERTICAL]!=nullptr));
276  Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));
277  Q_ASSERT((A_VH!=nullptr)==(A->mtx[VERTICAL][HORIZONTAL]!=nullptr));
278 
279  if (!A_HV || !B_VV || !A_VH)
280  return; // shortcut for empty
281 
282  merge_conditions(&cond,&A->cond);
283  merge_conditions(&cond,&B->cond);
284 
285  size2_t max_tile;
286  ConcurrencyContext::maxMaxtrixTile(max_tile);
287  clmatrix_t* temp = tempMatrix(A_HV->nrows,B_VV->ncols,pool);
288  clmatrix_t* C_HH = new_clmatrix(temp->nrows,A_VH->ncols,pool,&cond);
290 
291  clcubic_mul(temp, A_HV, B_VV, max_tile, ConcurrencyContext::clQueue(), &cond);
293 
294  clcubic_mul(C_HH, temp, A_VH, max_tile, ConcurrencyContext::clQueue(), &cond);
296 
297  this->clmtx[HORIZONTAL][HORIZONTAL] = C_HH;
298  Q_ASSERT(C_HH->nrows == this->mask[HORIZONTAL].len());
299  Q_ASSERT(C_HH->ncols == this->mask[HORIZONTAL].len());
300 }
301 
303 {
304  diagonalElement=-1;
306  if (!M) return;
307 
308  Q_ASSERT(diagonalElementBuffer==nullptr);
309 
310  // asynchronous call. returns buffer but no result
311  diagonalElementBuffer = clm4rm_query_diagonal(M, ConcurrencyContext::clContext(), ConcurrencyContext::clQueue(), &cond);
313 }
314 
316 {
318  // copy result from GPU; blocking callAda!
319  int i = clm4rm_query_result(diagonalElementBuffer, ConcurrencyContext::clQueue(), &cond);
320  if (i >= 0)
322  else
323  diagonalElement = -1;
324 
325  diagonalElementBuffer=nullptr; // was released by clm4rm_query_result
326  }
327  return diagonalElement;
328 }
329 
330 
331 clmatrix_t* GraphCL::tempMatrix(int rows, int cols, MatrixPool* pool) const
332 {
333  // odd-sized temp matrices are not pooled
334  clmatrix_t* t = new_clmatrix(rows, cols, pool, &cond);
335  temps.push_back(t);
336  return t;
337 }
338 
340  Graph::release(o1,o2,pool);
341  if (clmtx[o1][o2]) {
342  reclaim(clmtx[o1][o2],pool);
343  clmtx[o1][o2] = nullptr;
344  }
345  // release temp matrices to pool
346  if (pool) {
347  for (clmatrix_t *T : temps)
348  reclaim(T, pool);
349  temps.clear();
350  }
351 }
clmatrix_t * tempMatrix(int rows, int cols, MatrixPool *pool) const
allocate a temporary matrix
Definition: graph_cl.cpp:331
clmatrix_t * new_clmatrix(int rows, int cols, MatrixPool *pool, clm4rm_conditions *cond)
allocate a new clmatrix_t structure (a matrix for the CLM4RM algorithms)
virtual void synchFromGpu() override
copy adjacancy matrix data back from GPU memory to CPU memory
Definition: graph_cl.cpp:173
GraphCL(const GraphModel::ptr model)
empty constructor
Definition: graph_cl.cpp:37
OpenCL boolean matrix data structure. Data is arranged in 32 bit words.
Definition: clm4rm.h:98
void swap(gpuword **A, gpuword **B)
Reachability Graph with additional storage in GPU memory.
Definition: graph_cl.h:23
virtual void resetConditions() override
Definition: graph_cl.cpp:142
__kernel void clm4rm_query_diagonal(read_only_global M, int M_nrows, volatile __global int *result)
Query Matrix Diagonal.
clmatrix_t * clmtx[2][2]
data stored on GPU memory
Definition: graph_cl.h:26
size_t size2_t[2]
tow-dimensional size; used for various OpenCL parameters
Definition: clm4rm.h:67
void merge2(const Graph *A, const Graph *B, MatrixPool *pool)
void clm4rm_free(clmatrix_t *gpu_matrix)
release memory (CPU and GPU)
__kernel void clcubic_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows, int A_ncols)
OpenCL kernel for cubic matrix multiplication.
Definition: clcubic_mul.cl:112
global definitions for all algorithms.
mzd_t * mtx[2][2]
adjacency matrix (M4RI structure) split into four parts to allow for memory savings.
Definition: graph_m4ri.h:45
void copy(const GraphCL &that)
copy data
Definition: graph_cl.cpp:101
virtual void finalize() override
Definition: graph_cl.cpp:134
std::list< clmatrix_t * > temps
temporary matrixes
Definition: graph_cl.h:28
boost::shared_ptr< GraphModel > ptr
smart pointer to a GraphModel object
Definition: graph_model.h:307
boost::shared_ptr< Graph > GraphPtr
Definition: graph_m4ri.h:14
int diagonalElement
result of call to searchDiagonalElement
Definition: graph_m4ri.h:78
virtual void combine(const Graph *P) override
apply the COMBINE operation, filtering edges with valid placements. Effectively performs a Boolean AN...
Definition: graph_cl.cpp:190
virtual void merge2(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the MERGE operation, computing the transitive closure of two graphs. Effectively performs a mat...
Definition: graph_cl.cpp:227
virtual void synchToGpu() override
copy adjacancy matrix data to GPU memory
Definition: graph_cl.cpp:147
rci_t ncols
Number of columns.
Definition: clm4rm.h:101
int lower
lower index (inclusive)
Definition: graph_model.h:21
void clm4rm_write(clmatrix_t *gpu_matrix, const mzd_t *host_matrix, cl_command_queue queue, clm4rm_conditions *cond)
Copy matrix data from host memory to GPU. The operation is scheduled for asynchronous execution of th...
Definition: clm4rm.cpp:382
GraphPtr newGraph(const GraphModel::ptr model)
Definition: graph_cl.cpp:12
virtual void release()
release memory for all parts of the adjacancy matrix
Definition: graph_m4ri.cpp:203
void join_conditions(clm4rm_conditions *cond)
called when the pre-conditions are met. The post-conditions become new pre-conditioins.
Definition: clm4rm.cpp:319
virtual void queryDiagonalElement() const override
find an edge on the diagonal of the adjacancy matrix. Does not return a result. To query the result o...
Definition: graph_cl.cpp:302
__kernel void clm4rm_and(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise AND on three matrixes C := A & B.
a range of node indices in a Reachability Graph
Definition: graph_model.h:17
void merge_conditions(clm4rm_conditions *a, clm4rm_conditions *b)
merge pre-conditions into one list
Definition: clm4rm.cpp:314
clmatrix_t * clm4rm_create(rci_t rows, rci_t cols, int rowpadding, int read_only, cl_context ctx)
create an empty matrix
Definition: clm4rm.cpp:233
Orientation
Segment Orientation.
Definition: boundary.h:31
virtual ~GraphCL()
destructor; release all memory, including GPU memory
Definition: graph_cl.cpp:73
memory pool for matrix objects (M4RI matrices mzd_t* and OpenCL matrices clm4rm_t*)
Definition: matrix_pool.h:26
void reclaim(mzd_t *m, MatrixPool *pool)
reclaim an object (i.e. put it into the recycling list)
cl_mem data
handle to GPU data (32-bit unsigned integers)
Definition: clm4rm.h:114
Represents a Reachability Graph. Vertices correspond to intervals in the reachability structure,...
Definition: graph_boost.h:39
clm4rm_conditions cond
cl_events for out-of-order dependencies
Definition: graph_cl.h:30
int clm4rm_query_result(cl_mem result_buffer, cl_command_queue queue, clm4rm_conditions *cond)
examine the result of a previous call to clm4rm_query_diagonal
cl_mem diagonalElementBuffer
result of searchDiagonalElement
Definition: graph_cl.h:32
#define str(S)
clmatrix_t * clm4rm_copy(const mzd_t *host_matrix, int rowpadding, int read_only, cl_context ctx)
ceate a copy from a matrix in M4RI format
Definition: clm4rm.cpp:254
void init_conditions(clm4rm_conditions *cond)
reset conditions list
Definition: clm4rm.cpp:284
void merge3(const Graph *A, const Graph *B, MatrixPool *pool)
rci_t nrows
Number of rows.
Definition: clm4rm.h:99
__kernel void clutri_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows)
OpenCL kernel for cubic upper triangular matrix multiplication.
Definition: clcubic_mul.cl:200
void swap(GraphCL &that)
swap data
Definition: graph_cl.cpp:112
GraphCL & operator=(const GraphCL &that)
assginment operator
Definition: graph_cl.cpp:89
The Reachability Structure; maintains a list of intervals on the border of Free Space,...
Definition: structure.h:32
mzd_t * clm4rm_read(mzd_t *host_matrix, clmatrix_t *gpu_matrix, cl_command_queue queue, clm4rm_conditions *cond)
copy matrix from gpu memory to host
Definition: clm4rm.cpp:406
virtual void finalize()
release memory that is not neeeded (empty sub-graphs)
Definition: graph_m4ri.cpp:199
virtual int foundDiagonalElement() const override
Definition: graph_cl.cpp:315
static bool is_adjacent_to(const Graph &A, const Graph &B)
Definition: graph_m4ri.cpp:775
void release_conditions(clm4rm_conditions *cond)
release conditions list
Definition: clm4rm.cpp:297
virtual void merge3(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the final MERGE operation, computing the transitive closure of two graphs. Effectively performs...
Definition: graph_cl.cpp:263
Graph & operator=(const Graph &)
Definition: graph_boost.cpp:64
IndexRange mask[2]
Definition: graph_m4ri.h:72