Point Cloud Library (PCL)  1.9.1
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
cuda_async_copy.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  * @authors: Anatoly Baksheev
37  */
38 
39 
40 #ifndef PCL_GPU_PEOPLE_CUDA_ASYNC_COPY_H_
41 #define PCL_GPU_PEOPLE_CUDA_ASYNC_COPY_H_
42 
43 #include <pcl/gpu/containers/device_array.h>
44 #include <pcl/gpu/utils/safe_call.hpp>
45 
46 namespace pcl
47 {
48  namespace gpu
49  {
50  template<class T>
51  class AsyncCopy
52  {
53  public:
54  AsyncCopy(T* ptr, size_t size) : ptr_(ptr)
55  {
56  cudaSafeCall( cudaHostRegister(ptr_, size, 0) );
57  cudaSafeCall( cudaStreamCreate(&stream_) );
58  }
59 
60  AsyncCopy(std::vector<T>& data) : ptr_(&data[0])
61  {
62  cudaSafeCall( cudaHostRegister(ptr_, data.size(), 0) );
63  cudaSafeCall( cudaStreamCreate(&stream_) );
64  }
65 
67  {
68  cudaSafeCall( cudaHostUnregister(ptr_) );
69  cudaSafeCall( cudaStreamDestroy(stream_) );
70  }
71 
72  void download(const DeviceArray<T>& arr)
73  {
74  cudaSafeCall( cudaMemcpyAsync(ptr_, arr.ptr(), arr.sizeBytes(), cudaMemcpyDeviceToHost, stream_) );
75  }
76 
77  void download(const DeviceArray2D<T>& arr)
78  {
79  cudaSafeCall( cudaMemcpy2DAsync(ptr_, arr.cols(), arr.ptr(), arr.step(), arr.colsBytes(), arr.rows(), cudaMemcpyDeviceToHost, stream_) );
80  }
81 
82  void upload(const DeviceArray<T>& arr) const
83  {
84  cudaSafeCall( cudaMemcpyAsync(arr.ptr(), ptr_, arr.size(), cudaMemcpyHostToDevice, stream_) );
85  }
86 
87  void upload(const DeviceArray2D<T>& arr) const
88  {
89  cudaSafeCall( cudaMemcpy2DAsync(arr.ptr(), arr.step(), ptr_, arr.cols(), arr.colsBytes(), arr.rows(), cudaMemcpyHostToDevice, stream_) );
90  }
91 
93  {
94  cudaSafeCall( cudaStreamSynchronize(stream_) );
95  }
96  private:
97  cudaStream_t stream_;
98  T* ptr_ ;
99  };
100  }
101 
102  namespace device
103  {
104  using pcl::gpu::AsyncCopy;
105  }
106 }
107 
108 #endif /* PCL_GPU_PEOPLE_CUDA_ASYNC_COPY_H_ */
109 
size_t sizeBytes() const
size_t step() const
Returns stride between two consecutive rows in bytes for internal buffer.
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
DeviceArray2D class
Definition: device_array.h:154
int colsBytes() const
Returns number of bytes in each row.
T * ptr(int y=0)
Returns pointer to given row in internal buffer.
void upload(const DeviceArray< T > &arr) const
int rows() const
Returns number of rows.
AsyncCopy(std::vector< T > &data)
void upload(const DeviceArray2D< T > &arr) const
DeviceArray class
Definition: device_array.h:57
AsyncCopy(T *ptr, size_t size)
void download(const DeviceArray2D< T > &arr)
void download(const DeviceArray< T > &arr)
int cols() const
Returns number of elements in each row.
T * ptr()
Returns pointer for internal buffer in GPU memory.
size_t size() const
Returns size in elements.