15 #include "reconstruct3d.h" 16 #include "visiontransfer/alignedallocator.h" 26 #include <immintrin.h> 28 #include <emmintrin.h> 35 class Reconstruct3D::Pimpl {
39 float* createPointMap(
const unsigned short* dispMap,
int width,
int height,
40 int rowStride,
const float* q,
unsigned short minDisparity);
42 float* createPointMap(
const ImagePair& imagePair,
unsigned short minDisparity);
44 void writePlyFile(
const char* file,
const unsigned short* dispMap,
45 const unsigned char* image,
int width,
int height,
46 int dispRowStride,
int imageRowStride,
const float* q,
49 void writePlyFile(
const char* file,
const ImagePair& imagePair,
53 std::vector<float, AlignedAllocator<float> > pointMap;
55 float* createPointMapFallback(
const unsigned short* dispMap,
int width,
int height,
56 int rowStride,
const float* q,
unsigned short minDisparity);
58 float* createPointMapSSE2(
const unsigned short* dispMap,
int width,
int height,
59 int rowStride,
const float* q,
unsigned short minDisparity);
61 float* createPointMapAVX2(
const unsigned short* dispMap,
int width,
int height,
62 int rowStride,
const float* q,
unsigned short minDisparity);
71 Reconstruct3D::~Reconstruct3D() {
76 int rowStride,
const float* q,
unsigned short minDisparity) {
77 return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity);
81 return pimpl->createPointMap(imagePair, minDisparity);
85 const unsigned char* image,
int width,
int height,
86 int dispRowStride,
int imageRowStride,
const float* q,
double maxZ) {
87 pimpl->writePlyFile(file, dispMap, image, width, height, dispRowStride,
88 imageRowStride, q, maxZ);
93 pimpl->writePlyFile(file, imagePair, maxZ);
98 Reconstruct3D::Pimpl::Pimpl() {
101 float* Reconstruct3D::Pimpl::createPointMap(
const unsigned short* dispMap,
int width,
102 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
105 if(pointMap.size() !=
static_cast<unsigned int>(4*width*height)) {
106 pointMap.resize(4*width*height);
110 return createPointMapAVX2(dispMap, width, height, rowStride, q, minDisparity);
112 return createPointMapSSE2(dispMap, width, height, rowStride, q, minDisparity);
114 return createPointMapFallback(dispMap, width, height, rowStride, q, minDisparity);
118 float* Reconstruct3D::Pimpl::createPointMap(
const ImagePair& imagePair,
unsigned short minDisparity) {
120 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
127 float* Reconstruct3D::Pimpl::createPointMapFallback(
const unsigned short* dispMap,
int width,
128 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
130 float* outputPtr = &pointMap[0];
131 int stride = rowStride / 2;
133 for(
int y = 0; y < height; y++) {
134 double qx = q[1]*y + q[3];
135 double qy = q[5]*y + q[7];
136 double qz = q[9]*y + q[11];
137 double qw = q[13]*y + q[15];
139 for(
int x = 0; x < width; x++) {
140 unsigned short intDisp = std::max(minDisparity, dispMap[y*stride + x]);
141 if(intDisp >= 0xFFF) {
142 intDisp = minDisparity;
145 double d = intDisp / 16.0;
146 double w = qw + q[14]*d;
148 *outputPtr =
static_cast<float>((qx + q[2]*d)/w);
151 *outputPtr =
static_cast<float>((qy + q[6]*d)/w);
154 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
167 float* Reconstruct3D::Pimpl::createPointMapAVX2(
const unsigned short* dispMap,
int width,
168 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
171 const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
172 const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
173 const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
174 const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
177 const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
178 const __m256i maxDispVector = _mm256_set1_epi16(0xFFF);
179 const __m256 scaleVector = _mm256_set1_ps(1.0/16.0);
180 const __m256i zeroVector = _mm256_set1_epi16(0);
182 float* outputPtr = &pointMap[0];
184 for(
int y = 0; y < height; y++) {
185 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
186 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
189 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
190 __m256i disparities = _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr));
193 __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
194 disparities = _mm256_and_si256(validMask, disparities);
197 disparities = _mm256_max_epi16(disparities, minDispVector);
200 __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
203 __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
204 __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
208 __declspec(align(32))
float dispArray[16];
210 float dispArray[16]__attribute__((aligned(32)));
212 _mm256_store_ps(&dispArray[0], dispScaled);
215 floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
216 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
217 _mm256_store_ps(&dispArray[8], dispScaled);
220 for(
int i=0; i<16; i+=2) {
222 __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
223 x+1, y, dispArray[i+1], 1.0);
226 __m256 u1 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
227 __m256 u2 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
228 __m256 u3 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
229 __m256 u4 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
231 __m256 prod1 = _mm256_mul_ps(u1, qCol0);
232 __m256 prod2 = _mm256_mul_ps(u2, qCol1);
233 __m256 prod3 = _mm256_mul_ps(u3, qCol2);
234 __m256 prod4 = _mm256_mul_ps(u4, qCol3);
236 __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
239 __m256 point = _mm256_div_ps(multResult,
240 _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
243 _mm256_store_ps(outputPtr, point);
256 float* Reconstruct3D::Pimpl::createPointMapSSE2(
const unsigned short* dispMap,
int width,
257 int height,
int rowStride,
const float* q,
unsigned short minDisparity) {
260 const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
261 const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
262 const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
263 const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
266 const __m128i minDispVector = _mm_set1_epi16(minDisparity);
267 const __m128i maxDispVector = _mm_set1_epi16(0xFFF);
268 const __m128 scaleVector = _mm_set1_ps(1.0/16.0);
269 const __m128i zeroVector = _mm_set1_epi16(0);
271 float* outputPtr = &pointMap[0];
273 for(
int y = 0; y < height; y++) {
274 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
275 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
278 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
279 __m128i disparities = _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
282 __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
283 disparities = _mm_and_si128(validMask, disparities);
286 disparities = _mm_max_epi16(disparities, minDispVector);
289 __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
290 __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
294 __declspec(align(16))
float dispArray[8];
296 float dispArray[8]__attribute__((aligned(16)));
298 _mm_store_ps(&dispArray[0], dispScaled);
301 floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
302 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
303 _mm_store_ps(&dispArray[4], dispScaled);
306 for(
int i=0; i<8; i++) {
308 __m128 vec = _mm_setr_ps(static_cast<float>(x), static_cast<float>(y), dispArray[i], 1.0);
311 __m128 u1 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
312 __m128 u2 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
313 __m128 u3 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
314 __m128 u4 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
316 __m128 prod1 = _mm_mul_ps(u1, qCol0);
317 __m128 prod2 = _mm_mul_ps(u2, qCol1);
318 __m128 prod3 = _mm_mul_ps(u3, qCol2);
319 __m128 prod4 = _mm_mul_ps(u4, qCol3);
321 __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
324 __m128 point = _mm_div_ps(multResult,
325 _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
328 _mm_store_ps(outputPtr, point);
340 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const unsigned short* dispMap,
341 const unsigned char* image,
int width,
int height,
int dispRowStride,
int imageRowStride,
342 const float* q,
double maxZ) {
344 float* pointMap =
createPointMap(dispMap, width, height, dispRowStride, q, maxZ >= 0 ? 1 : 0);
349 for(
int i=0; i<width*height; i++) {
350 if(pointMap[4*i+2] <= maxZ) {
355 pointsCount = width*height;
359 fstream strm(file, ios::out);
360 strm <<
"ply" << endl
361 <<
"format ascii 1.0" << endl
362 <<
"element vertex " << pointsCount << endl
363 <<
"property float x" << endl
364 <<
"property float y" << endl
365 <<
"property float z" << endl
366 <<
"property uchar red" << endl
367 <<
"property uchar green" << endl
368 <<
"property uchar blue" << endl
369 <<
"end_header" << endl;
372 for(
int i=0; i<width*height; i++) {
375 int col =
static_cast<int>(image[y*imageRowStride + x]);
377 if(maxZ < 0 || pointMap[4*i+2] <= maxZ) {
378 if(std::isfinite(pointMap[4*i + 2])) {
379 strm << pointMap[4*i]
380 <<
" " << pointMap[4*i + 1]
381 <<
" " << pointMap[4*i + 2];
383 strm <<
"NaN NaN NaN";
386 strm <<
" " << col <<
" " << col <<
" " << col << endl;
391 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const ImagePair& imagePair,
394 throw std::runtime_error(
"Camera image must have 8-bit pixel format!");
397 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
float * createPointMap(const unsigned short *dispMap, int width, int height, int rowStride, const float *q, unsigned short minDisparity=1)
Reconstructs the 3D location of each pixel in the given disparity map.
unsigned char * getPixelData(int imageNumber) const
Returns the pixel data for the given image.
const float * getQMatrix() const
Returns a pointer to the disparity-to-depth mapping matrix q.
int getWidth() const
Returns the width of each image.
Reconstruct3D()
Constructs a new object for 3D reconstructing.
A set of two images, which are usually the left camera image and the disparity map.
ImageFormat getPixelFormat(int imageNumber) const
Returns the pixel format for the given image.
void writePlyFile(const char *file, const unsigned short *dispMap, const unsigned char *image, int width, int height, int dispRowStride, int imageRowStride, const float *q, double maxZ=std::numeric_limits< double >::max())
Projects the given disparity map to 3D points and exports the result to a PLY file.
int getHeight() const
Returns the height of each image.
int getRowStride(int imageNumber) const
Returns the row stride for the pixel data of one image.