14 if (ConcurrencyContext::hasGpuSupport())
22 if (ConcurrencyContext::hasGpuSupport())
30 if (ConcurrencyContext::hasGpuSupport())
39 clmtx{ {
nullptr,
nullptr},{
nullptr,
nullptr} },
40 temps(), cond(), diagonalElementBuffer(
nullptr)
46 :
Graph(model, hmask),
47 clmtx{ {
nullptr,
nullptr },{
nullptr,
nullptr } },
48 temps(), cond(), diagonalElementBuffer(
nullptr)
55 clmtx{ {
nullptr,
nullptr}, {
nullptr,
nullptr } },
56 temps(), cond(), diagonalElementBuffer(
nullptr)
78 clmtx[o1][o2] =
nullptr;
105 if (that.
clmtx[o1][o2])
108 clmtx[o1][o2] =
nullptr;
126 clmtx[o1][o2] =
nullptr;
152 mzd_t* M =
mtx[o1][o2];
160 G =
clmtx[o1][o2] =
nullptr;
165 true, ConcurrencyContext::clContext());
183 ConcurrencyContext::clQueue(), &
cond);
192 const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
216 clm4rm_and(A_VV, A_VV, B_VV, ConcurrencyContext::clQueue(),&
cond);
229 const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
230 const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
250 ConcurrencyContext::maxMaxtrixTile(max_tile);
254 clutri_mul(C_VV, A_VV, B_VV, max_tile, ConcurrencyContext::clQueue(), &
cond);
265 const GraphCL* A = dynamic_cast<const GraphCL*>(Ag);
266 const GraphCL* B = dynamic_cast<const GraphCL*>(Bg);
279 if (!A_HV || !B_VV || !A_VH)
286 ConcurrencyContext::maxMaxtrixTile(max_tile);
291 clcubic_mul(temp, A_HV, B_VV, max_tile, ConcurrencyContext::clQueue(), &
cond);
294 clcubic_mul(C_HH, temp, A_VH, max_tile, ConcurrencyContext::clQueue(), &
cond);
343 clmtx[o1][o2] =
nullptr;
clmatrix_t * tempMatrix(int rows, int cols, MatrixPool *pool) const
allocate a temporary matrix
clmatrix_t * new_clmatrix(int rows, int cols, MatrixPool *pool, clm4rm_conditions *cond)
allocate a new clmatrix_t structure (a matrix for the CLM4RM algorithms)
virtual void synchFromGpu() override
copy adjacancy matrix data back from GPU memory to CPU memory
GraphCL(const GraphModel::ptr model)
empty constructor
OpenCL boolean matrix data structure. Data is arranged in 32 bit words.
void swap(gpuword **A, gpuword **B)
Reachability Graph with additional storage in GPU memory.
virtual void resetConditions() override
__kernel void clm4rm_query_diagonal(read_only_global M, int M_nrows, volatile __global int *result)
Query Matrix Diagonal.
clmatrix_t * clmtx[2][2]
data stored on GPU memory
size_t size2_t[2]
tow-dimensional size; used for various OpenCL parameters
void merge2(const Graph *A, const Graph *B, MatrixPool *pool)
void clm4rm_free(clmatrix_t *gpu_matrix)
release memory (CPU and GPU)
__kernel void clcubic_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows, int A_ncols)
OpenCL kernel for cubic matrix multiplication.
global definitions for all algorithms.
mzd_t * mtx[2][2]
adjacency matrix (M4RI structure) split into four parts to allow for memory savings.
void copy(const GraphCL &that)
copy data
virtual void finalize() override
std::list< clmatrix_t * > temps
temporary matrixes
boost::shared_ptr< GraphModel > ptr
smart pointer to a GraphModel object
boost::shared_ptr< Graph > GraphPtr
int diagonalElement
result of call to searchDiagonalElement
virtual void combine(const Graph *P) override
apply the COMBINE operation, filtering edges with valid placements. Effectively performs a Boolean AN...
virtual void merge2(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the MERGE operation, computing the transitive closure of two graphs. Effectively performs a mat...
virtual void synchToGpu() override
copy adjacancy matrix data to GPU memory
rci_t ncols
Number of columns.
int lower
lower index (inclusive)
void clm4rm_write(clmatrix_t *gpu_matrix, const mzd_t *host_matrix, cl_command_queue queue, clm4rm_conditions *cond)
Copy matrix data from host memory to GPU. The operation is scheduled for asynchronous execution of th...
GraphPtr newGraph(const GraphModel::ptr model)
virtual void release()
release memory for all parts of the adjacancy matrix
void join_conditions(clm4rm_conditions *cond)
called when the pre-conditions are met. The post-conditions become new pre-conditioins.
virtual void queryDiagonalElement() const override
find an edge on the diagonal of the adjacancy matrix. Does not return a result. To query the result o...
__kernel void clm4rm_and(write_only_global C, read_only_global A, read_only_global B)
OpenCL kernel for bitwise AND on three matrixes C := A & B.
a range of node indices in a Reachability Graph
void merge_conditions(clm4rm_conditions *a, clm4rm_conditions *b)
merge pre-conditions into one list
clmatrix_t * clm4rm_create(rci_t rows, rci_t cols, int rowpadding, int read_only, cl_context ctx)
create an empty matrix
Orientation
Segment Orientation.
virtual ~GraphCL()
destructor; release all memory, including GPU memory
memory pool for matrix objects (M4RI matrices mzd_t* and OpenCL matrices clm4rm_t*)
void reclaim(mzd_t *m, MatrixPool *pool)
reclaim an object (i.e. put it into the recycling list)
cl_mem data
handle to GPU data (32-bit unsigned integers)
Represents a Reachability Graph. Vertices correspond to intervals in the reachability structure,...
clm4rm_conditions cond
cl_events for out-of-order dependencies
int clm4rm_query_result(cl_mem result_buffer, cl_command_queue queue, clm4rm_conditions *cond)
examine the result of a previous call to clm4rm_query_diagonal
cl_mem diagonalElementBuffer
result of searchDiagonalElement
clmatrix_t * clm4rm_copy(const mzd_t *host_matrix, int rowpadding, int read_only, cl_context ctx)
ceate a copy from a matrix in M4RI format
void init_conditions(clm4rm_conditions *cond)
reset conditions list
void merge3(const Graph *A, const Graph *B, MatrixPool *pool)
rci_t nrows
Number of rows.
__kernel void clutri_mul(write_only_global C, read_only_global A, read_only_global B, __local gpuword *A_buf, __local gpuword *B_buf, int A_nrows)
OpenCL kernel for cubic upper triangular matrix multiplication.
void swap(GraphCL &that)
swap data
GraphCL & operator=(const GraphCL &that)
assginment operator
The Reachability Structure; maintains a list of intervals on the border of Free Space,...
mzd_t * clm4rm_read(mzd_t *host_matrix, clmatrix_t *gpu_matrix, cl_command_queue queue, clm4rm_conditions *cond)
copy matrix from gpu memory to host
virtual void finalize()
release memory that is not neeeded (empty sub-graphs)
virtual int foundDiagonalElement() const override
static bool is_adjacent_to(const Graph &A, const Graph &B)
void release_conditions(clm4rm_conditions *cond)
release conditions list
virtual void merge3(const Graph *A, const Graph *B, MatrixPool *pool) override
apply the final MERGE operation, computing the transitive closure of two graphs. Effectively performs...
Graph & operator=(const Graph &)