fv/html/graph__cl_8cpp_source.html

 #include <graph_cl.h>
 #include <concurrency.h>

 using namespace frechet;
 using namespace reach;
 using namespace app;

 /*
  *  Factory Methods
  */
 GraphPtr frechet::reach::newGraph(const GraphModel::ptr model)
 {
     if (ConcurrencyContext::hasGpuSupport())
         return GraphPtr(new GraphCL(model));
     else
         return GraphPtr(new Graph(model));
 }

 GraphPtr frechet::reach::newGraph(const GraphModel::ptr model, IndexRange hmask)
 {
     if (ConcurrencyContext::hasGpuSupport())
         return GraphPtr(new GraphCL(model,hmask));
     else
         return GraphPtr(new Graph(model, hmask));
 }

 GraphPtr frechet::reach::newGraph(const GraphModel::ptr model, Structure& str)
 {
     if (ConcurrencyContext::hasGpuSupport())
         return GraphPtr(new GraphCL(model,str));
     else
         return GraphPtr(new Graph(model, str));
 }


 GraphCL::GraphCL(const GraphModel::ptr model)
     : Graph(model),
     clmtx{ {nullptr,nullptr},{nullptr,nullptr} },
     temps(), cond(), diagonalElementBuffer(nullptr)
 {
     init_conditions(&cond);
 }

 GraphCL::GraphCL(const GraphModel::ptr model, IndexRange hmask)
     : Graph(model, hmask),
     clmtx{ { nullptr,nullptr },{ nullptr,nullptr } },
     temps(), cond(), diagonalElementBuffer(nullptr)
 {
     init_conditions(&cond);
 }

 GraphCL::GraphCL(const GraphModel::ptr model, Structure& str)
     : Graph(model, str),
     clmtx{ {nullptr, nullptr}, { nullptr,nullptr } },
     temps(), cond(), diagonalElementBuffer(nullptr)
 {
     init_conditions(&cond);
 }

 GraphCL::GraphCL(const GraphCL& that)
     : Graph(that)
 {
     copy(that);
 }

 GraphCL::GraphCL(GraphCL&& that)
     : Graph(that)
 {
     swap(that);
 }

 GraphCL::~GraphCL()
 {
     for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
         for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2) {
             clm4rm_free(clmtx[o1][o2]);
             clmtx[o1][o2] = nullptr;
         }
     for (clmatrix_t* t : temps)
         clm4rm_free(t);
     temps.clear();

     if (diagonalElementBuffer)
         clReleaseMemObject(diagonalElementBuffer);
     release_conditions(&cond);
 }

 GraphCL& GraphCL::operator= (const GraphCL& that) {
     Graph::operator=(that);
     copy(that);
     return *this;
 }

 GraphCL& GraphCL::operator= (GraphCL&& that) {
     Graph::operator=(that);
     swap(that);
     return *this;
 }

 void GraphCL::copy(const GraphCL& that)
 {
     for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
         for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
             if (that.clmtx[o1][o2])
                 clmtx[o1][o2] = clm4rm_copy(that.mtx[o1][o2], 32, true, ConcurrencyContext::clContext());
             else
                 clmtx[o1][o2] = nullptr;
     //  don't copy diagonalElementBuffer
 }

 void GraphCL::swap(GraphCL& that)
 {
     //  steal from 'that'
     for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
         for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
             std::swap(clmtx[o1][o2], that.clmtx[o1][o2]);
     std::swap(this->diagonalElementBuffer, that.diagonalElementBuffer);
 }

 void GraphCL::release(Orientation o1, Orientation o2)
 {
     Graph::release(o1, o2);
     if (clmtx[o1][o2]) {
         clm4rm_free(clmtx[o1][o2]);
         clmtx[o1][o2] = nullptr;
     }
 }

 void GraphCL::finalize() {
     Graph::finalize();    //    releaseIfZero
     synchToGpu();
 }
 void GraphCL::resetConditions()
 {
     release_conditions(&cond);
 }

 void GraphCL::synchToGpu() {
     //  copy to GPU memory
     for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
         for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
         {
             mzd_t* M = mtx[o1][o2];
             clmatrix_t* G = clmtx[o1][o2];

             if (!M && !G)
                 continue;

             if (G) {
                 clm4rm_free(G);
                 G = clmtx[o1][o2] = nullptr;
             }

             if (M) {
                 G = clmtx[o1][o2] = clm4rm_create(M->nrows, M->ncols, 32,
                                         true, ConcurrencyContext::clContext());
                 clm4rm_write(G, M, ConcurrencyContext::clQueue(), &cond);
                 //  Note: clm4rm_copy does the same but is always _blocking_
                 join_conditions(&cond);
             }
         }
 }

 void GraphCL::synchFromGpu() {
     //  copy to GPU memory
     for (Orientation o1 = HORIZONTAL; o1 <= VERTICAL; ++o1)
         for (Orientation o2 = HORIZONTAL; o2 <= VERTICAL; ++o2)
         {
             if (!mtx[o1][o2] && !clmtx[o1][o2])
                 continue;

             if (clmtx[o1][o2]) {
                 mtx[o1][o2] = clm4rm_read(mtx[o1][o2], clmtx[o1][o2],
                                           ConcurrencyContext::clQueue(), &cond);
                 join_conditions(&cond);
             }
         }
 }

 //  (*this)_VV &= that_VV
 void GraphCL::combine(const Graph* Bg)
 {
     const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
     Q_ASSERT(B);

     clmatrix_t* A_VV = this->clmtx[VERTICAL][VERTICAL];
     clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];

     Q_ASSERT((this->mtx[VERTICAL][VERTICAL]==nullptr) || (A_VV!=nullptr));
     Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));

     if (!A_VV) return;
     if (!B_VV) {
         release(VERTICAL, VERTICAL);
         return;
     }

     merge_conditions(&cond,&B->cond);
 #if IMAGE2D
         //  can't do clm4rm_and in-place. Use temp matrix instead:
         clmatrix_t* T = tempMatrix(A_VV->nrows,A_VV->ncols);
         clm4rm_and(T, A_VV, B_VV, ConcurrencyContext::clQueue(),&cond);
         std::swap(A_VV->data,T->data);
         //  don't release T. Everything is (will be) asynchronous.
 #else
         //  VV are upper triangular matrices
         clm4rm_and(A_VV, A_VV, B_VV, ConcurrencyContext::clQueue(),&cond);
         //  TODO clutri_and
 #endif
     join_conditions(&cond);
 }

 void merge2(const Graph* A, const Graph* B, MatrixPool* pool);

 void merge3(const Graph* A, const Graph* B, MatrixPool* pool);

 //  @return (*this)_VV * that_VV
 void GraphCL::merge2(const Graph* Ag, const Graph* Bg, MatrixPool* pool)
 {
     const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
     const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
     Q_ASSERT(A && B);
     Q_ASSERT(is_adjacent_to(*A, *B));

     GraphCL* C = this;

     clmatrix_t* A_VV = A->clmtx[VERTICAL][VERTICAL];
     clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];

     Q_ASSERT((A->mtx[VERTICAL][VERTICAL]==nullptr) || (A_VV!=nullptr));
     Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));

     if (!A_VV || !B_VV)
         return;

     merge_conditions(&cond,&A->cond);
     merge_conditions(&cond,&B->cond);

     //  both matrices are upper triangular
     size2_t max_tile;
     ConcurrencyContext::maxMaxtrixTile(max_tile);
     clmatrix_t* C_VV = new_clmatrix(A_VV->nrows,B_VV->ncols,pool,&cond);
     join_conditions(&cond);

     clutri_mul(C_VV, A_VV, B_VV,  max_tile, ConcurrencyContext::clQueue(), &cond);
     join_conditions(&cond);

     this->clmtx[VERTICAL][VERTICAL] = C_VV;
     Q_ASSERT(C_VV->nrows == this->mask[VERTICAL].len());
     Q_ASSERT(C_VV->ncols == this->mask[VERTICAL].len());
 }

 //  @return (*this)_HV * that_VV * (*this)_VH
 void GraphCL::merge3(const Graph* Ag, const Graph* Bg, MatrixPool* pool)
 {
     const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
     const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
     Q_ASSERT(A && B);
     Q_ASSERT(is_adjacent_to(*A, *B));
     Q_ASSERT(is_adjacent_to(*B, *A));

     clmatrix_t* A_HV = A->clmtx[HORIZONTAL][VERTICAL];
     clmatrix_t* B_VV = B->clmtx[VERTICAL][VERTICAL];
     clmatrix_t* A_VH = A->clmtx[VERTICAL][HORIZONTAL];

     Q_ASSERT((A_HV!=nullptr)==(A->mtx[HORIZONTAL][VERTICAL]!=nullptr));
     Q_ASSERT((B->mtx[VERTICAL][VERTICAL]==nullptr) || (B_VV!=nullptr));
     Q_ASSERT((A_VH!=nullptr)==(A->mtx[VERTICAL][HORIZONTAL]!=nullptr));

     if (!A_HV || !B_VV || !A_VH)
         return;   //  shortcut for empty

     merge_conditions(&cond,&A->cond);
     merge_conditions(&cond,&B->cond);

     size2_t max_tile;
     ConcurrencyContext::maxMaxtrixTile(max_tile);
     clmatrix_t* temp = tempMatrix(A_HV->nrows,B_VV->ncols,pool);
     clmatrix_t* C_HH = new_clmatrix(temp->nrows,A_VH->ncols,pool,&cond);
     join_conditions(&cond);

     clcubic_mul(temp, A_HV, B_VV, max_tile, ConcurrencyContext::clQueue(), &cond);
     join_conditions(&cond);

     clcubic_mul(C_HH, temp, A_VH,  max_tile, ConcurrencyContext::clQueue(), &cond);
     join_conditions(&cond);

     this->clmtx[HORIZONTAL][HORIZONTAL] = C_HH;
     Q_ASSERT(C_HH->nrows == this->mask[HORIZONTAL].len());
     Q_ASSERT(C_HH->ncols == this->mask[HORIZONTAL].len());
 }

 void GraphCL::queryDiagonalElement() const
 {
     diagonalElement=-1;
     clmatrix_t* M = clmtx[HORIZONTAL][HORIZONTAL];
     if (!M) return;

     Q_ASSERT(diagonalElementBuffer==nullptr);

     //  asynchronous call. returns buffer but no result
     diagonalElementBuffer = clm4rm_query_diagonal(M, ConcurrencyContext::clContext(), ConcurrencyContext::clQueue(), &cond);
     join_conditions(&cond);
 }

 int GraphCL::foundDiagonalElement() const
 {
     if(diagonalElementBuffer) {
         //  copy result from GPU; blocking callAda!
         int i = clm4rm_query_result(diagonalElementBuffer, ConcurrencyContext::clQueue(), &cond);
         if (i >= 0)
             diagonalElement = i + mask[HORIZONTAL].lower;
         else
             diagonalElement = -1;

         diagonalElementBuffer=nullptr; // was released by clm4rm_query_result
     }
     return diagonalElement;
 }


 clmatrix_t* GraphCL::tempMatrix(int rows, int cols, MatrixPool* pool) const
 {
     //  odd-sized temp matrices are not pooled
     clmatrix_t* t = new_clmatrix(rows, cols, pool, &cond);
     temps.push_back(t);
     return t;
 }

 void GraphCL::release(Orientation o1, Orientation o2, MatrixPool* pool) {
     Graph::release(o1,o2,pool);
     if (clmtx[o1][o2]) {
         reclaim(clmtx[o1][o2],pool);
         clmtx[o1][o2] = nullptr;
     }
     //  release temp matrices to pool
     if (pool) {
         for (clmatrix_t *T : temps)
             reclaim(T, pool);
         temps.clear();
     }
 }
frechet::reach::GraphCL::tempMatrix
clmatrix_t * tempMatrix(int rows, int cols, MatrixPool *pool) const
allocate a temporary matrix
Definition: graph_cl.cpp:331

frechet::data::new_clmatrix
clmatrix_t * new_clmatrix(int rows, int cols, MatrixPool *pool, clm4rm_conditions *cond)
allocate a new clmatrix_t structure (a matrix for the CLM4RM algorithms)
Definition: matrix_pool.cpp:116

frechet::reach::GraphCL::synchFromGpu
virtual void synchFromGpu() override
copy adjacancy matrix data back from GPU memory to CPU memory
Definition: graph_cl.cpp:173

frechet::reach::GraphCL::GraphCL
GraphCL(const GraphModel::ptr model)
empty constructor
Definition: graph_cl.cpp:37

clmatrix_t
OpenCL boolean matrix data structure. Data is arranged in 32 bit words.
Definition: clm4rm.h:98

swap
void swap(gpuword **A, gpuword **B)
Definition: ocl_prototype.cpp:284

frechet::reach::GraphCL
Reachability Graph with additional storage in GPU memory.
Definition: graph_cl.h:23

frechet::reach::GraphCL::resetConditions
virtual void resetConditions() override
Definition: graph_cl.cpp:142

clm4rm_query_diagonal
__kernel void clm4rm_query_diagonal(read_only_global M, int M_nrows, volatile __global int *result)
Query Matrix Diagonal.
Definition: clm4rm_bitwise.cl:117

frechet::reach::GraphCL::clmtx
clmatrix_t * clmtx[2][2]
data stored on GPU memory
Definition: graph_cl.h:26

size2_t
size_t size2_t[2]
tow-dimensional size; used for various OpenCL parameters
Definition: clm4rm.h:67

merge2
void merge2(const Graph *A, const Graph *B, MatrixPool *pool)

clm4rm_free
void clm4rm_free(clmatrix_t *gpu_matrix)
release memory (CPU and GPU)
Definition: clm4rm_bitwise.cpp:112

clcubic_mul
__kernel void clcubic_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows, int A_ncols)
OpenCL kernel for cubic matrix multiplication.
Definition: clcubic_mul.cl:112

frechet
global definitions for all algorithms.

frechet::reach::Graph::mtx
mzd_t * mtx[2][2]
adjacency matrix (M4RI structure) split into four parts to allow for memory savings.
Definition: graph_m4ri.h:45

frechet::reach::GraphCL::copy
void copy(const GraphCL &that)
copy data
Definition: graph_cl.cpp:101

frechet::reach::GraphCL::finalize
virtual void finalize() override
Definition: graph_cl.cpp:134

frechet::reach::GraphCL::temps
std::list< clmatrix_t * > temps
temporary matrixes
Definition: graph_cl.h:28

frechet::reach::GraphModel::ptr
boost::shared_ptr< GraphModel > ptr
smart pointer to a GraphModel object
Definition: graph_model.h:307

frechet::reach::GraphPtr
boost::shared_ptr< Graph > GraphPtr
Definition: graph_m4ri.h:14

frechet::reach::Graph::diagonalElement
int diagonalElement
result of call to searchDiagonalElement
Definition: graph_m4ri.h:78

frechet::reach::GraphCL::combine
virtual void combine(const Graph *P) override
apply the COMBINE operation, filtering edges with valid placements. Effectively performs a Boolean AN...
Definition: graph_cl.cpp:190

frechet::reach::GraphCL::merge2
virtual void merge2(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the MERGE operation, computing the transitive closure of two graphs. Effectively performs a mat...
Definition: graph_cl.cpp:227

frechet::reach::GraphCL::synchToGpu
virtual void synchToGpu() override
copy adjacancy matrix data to GPU memory
Definition: graph_cl.cpp:147

clmatrix_t::ncols
rci_t ncols
Number of columns.
Definition: clm4rm.h:101

frechet::reach::HORIZONTAL
Definition: boundary.h:32

frechet::reach::IndexRange::lower
int lower
lower index (inclusive)
Definition: graph_model.h:21

clm4rm_write
void clm4rm_write(clmatrix_t *gpu_matrix, const mzd_t *host_matrix, cl_command_queue queue, clm4rm_conditions *cond)
Copy matrix data from host memory to GPU. The operation is scheduled for asynchronous execution of th...
Definition: clm4rm.cpp:382

frechet::reach::newGraph
GraphPtr newGraph(const GraphModel::ptr model)
Definition: graph_cl.cpp:12

frechet::reach::Graph::release
virtual void release()
release memory for all parts of the adjacancy matrix
Definition: graph_m4ri.cpp:203

join_conditions
void join_conditions(clm4rm_conditions *cond)
called when the pre-conditions are met. The post-conditions become new pre-conditioins.
Definition: clm4rm.cpp:319

frechet::reach::GraphCL::queryDiagonalElement
virtual void queryDiagonalElement() const override
find an edge on the diagonal of the adjacancy matrix. Does not return a result. To query the result o...
Definition: graph_cl.cpp:302

clm4rm_and
__kernel void clm4rm_and(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise AND on three matrixes C := A & B.
Definition: clm4rm_bitwise.cl:35

frechet::reach::IndexRange
a range of node indices in a Reachability Graph
Definition: graph_model.h:17

merge_conditions
void merge_conditions(clm4rm_conditions *a, clm4rm_conditions *b)
merge pre-conditions into one list
Definition: clm4rm.cpp:314

clm4rm_create
clmatrix_t * clm4rm_create(rci_t rows, rci_t cols, int rowpadding, int read_only, cl_context ctx)
create an empty matrix
Definition: clm4rm.cpp:233

frechet::reach::Orientation
Orientation
Segment Orientation.
Definition: boundary.h:31

frechet::reach::VERTICAL
Definition: boundary.h:32

frechet::reach::GraphCL::~GraphCL
virtual ~GraphCL()
destructor; release all memory, including GPU memory
Definition: graph_cl.cpp:73

frechet::data::MatrixPool
memory pool for matrix objects (M4RI matrices mzd_t* and OpenCL matrices clm4rm_t*)
Definition: matrix_pool.h:26

frechet::data::reclaim
void reclaim(mzd_t *m, MatrixPool *pool)
reclaim an object (i.e. put it into the recycling list)
Definition: matrix_pool.cpp:128

clmatrix_t::data
cl_mem data
handle to GPU data (32-bit unsigned integers)
Definition: clm4rm.h:114

frechet::reach::Graph
Represents a Reachability Graph. Vertices correspond to intervals in the reachability structure,...
Definition: graph_boost.h:39

frechet::reach::GraphCL::cond
clm4rm_conditions cond
cl_events for out-of-order dependencies
Definition: graph_cl.h:30

clm4rm_query_result
int clm4rm_query_result(cl_mem result_buffer, cl_command_queue queue, clm4rm_conditions *cond)
examine the result of a previous call to clm4rm_query_diagonal
Definition: clm4rm_bitwise.cpp:94

frechet::reach::GraphCL::diagonalElementBuffer
cl_mem diagonalElementBuffer
result of searchDiagonalElement
Definition: graph_cl.h:32

str
#define str(S)

clm4rm_copy
clmatrix_t * clm4rm_copy(const mzd_t *host_matrix, int rowpadding, int read_only, cl_context ctx)
ceate a copy from a matrix in M4RI format
Definition: clm4rm.cpp:254

init_conditions
void init_conditions(clm4rm_conditions *cond)
reset conditions list
Definition: clm4rm.cpp:284

merge3
void merge3(const Graph *A, const Graph *B, MatrixPool *pool)

clmatrix_t::nrows
rci_t nrows
Number of rows.
Definition: clm4rm.h:99

clutri_mul
__kernel void clutri_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows)
OpenCL kernel for cubic upper triangular matrix multiplication.
Definition: clcubic_mul.cl:200

graph_cl.h

frechet::reach::GraphCL::swap
void swap(GraphCL &that)
swap data
Definition: graph_cl.cpp:112

frechet::reach::GraphCL::operator=
GraphCL & operator=(const GraphCL &that)
assginment operator
Definition: graph_cl.cpp:89

frechet::reach::Structure
The Reachability Structure; maintains a list of intervals on the border of Free Space,...
Definition: structure.h:32

concurrency.h

clm4rm_read
mzd_t * clm4rm_read(mzd_t *host_matrix, clmatrix_t *gpu_matrix, cl_command_queue queue, clm4rm_conditions *cond)
copy matrix from gpu memory to host
Definition: clm4rm.cpp:406

frechet::reach::Graph::finalize
virtual void finalize()
release memory that is not neeeded (empty sub-graphs)
Definition: graph_m4ri.cpp:199

frechet::reach::GraphCL::foundDiagonalElement
virtual int foundDiagonalElement() const override
Definition: graph_cl.cpp:315

frechet::reach::Graph::is_adjacent_to
static bool is_adjacent_to(const Graph &A, const Graph &B)
Definition: graph_m4ri.cpp:775

release_conditions
void release_conditions(clm4rm_conditions *cond)
release conditions list
Definition: clm4rm.cpp:297

frechet::reach::GraphCL::merge3
virtual void merge3(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the final MERGE operation, computing the transitive closure of two graphs. Effectively performs...
Definition: graph_cl.cpp:263

frechet::reach::Graph::operator=
Graph & operator=(const Graph &)
Definition: graph_boost.cpp:64

frechet::reach::Graph::mask
IndexRange mask[2]
Definition: graph_m4ri.h:72