diff --git a/CMakeLists.txt b/CMakeLists.txt index 00dea37..9a0688d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ endif (BUILD_SHARED_LIBS) # pkg-config integration set(PROJECT_VERSION "0.2") -set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32/64)") +set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name") set(EXEC_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} CACHE PATH "Installation prefix for executables and object code libraries" FORCE) set(BIN_INSTALL_DIR ${EXEC_INSTALL_PREFIX}/bin CACHE PATH "Installation prefix for user executables" FORCE) set(LIB_INSTALL_DIR ${EXEC_INSTALL_PREFIX}/lib${LIB_SUFFIX} CACHE PATH "Installation prefix for object code libraries" FORCE) @@ -78,6 +78,6 @@ if (UNIX) install(TARGETS CineFormShared DESTINATION lib/) endif (BUILD_SHARED_LIBS) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcineform.pc DESTINATION lib/pkgconfig/) install(FILES ${PUBLIC_HEADERS} DESTINATION include/libcineform/) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcineform.pc DESTINATION lib/pkgconfig/) endif (UNIX) diff --git a/Codec/InvertHorizontalStrip16s.c b/Codec/InvertHorizontalStrip16s.c index 7c65304..e388278 100644 --- a/Codec/InvertHorizontalStrip16s.c +++ b/Codec/InvertHorizontalStrip16s.c @@ -183,7 +183,7 @@ void InvertHorizontalStrip16s(PIXEL *lowpass_band, // Horizontal lowpass coeffic // Place the odd result in the odd column output[1] = SATURATE(odd); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass coefficients low1_pi16 = *((__m64 *)&lowpass[column]); @@ -1443,7 +1443,7 @@ void InvertHorizontalStripDescale16s(PIXEL *lowpass_band, int lowpass_pitch, // Place the odd result in the odd column output[1] = SATURATE(odd); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass coefficients low1_pi16 = *((__m64 *)&lowpass[column]); @@ -2725,7 +2725,7 @@ void InvertHorizontalStripPrescaled16s(PIXEL *lowpass_band, // Horizontal lowpa // Place the odd result in the odd column output[1] = SATURATE(odd); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass coefficients low1_pi16 = *((__m64 *)&lowpass[column]); @@ -3486,7 +3486,7 @@ void InvertHorizontalStrip16sToYUYV(PIXEL *lowpass_band[], // Horizontal lowpass // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i y_low1_epi16; // Lowpass coefficients __m128i y_low2_epi16; __m128i u_low1_epi16; @@ -3663,7 +3663,7 @@ void InvertHorizontalStrip16sToYUYV(PIXEL *lowpass_band[], // Horizontal lowpass // Save the value for use in the fast loop v_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients y_low1_epi16 = _mm_load_si128((__m128i *)&y_lowpass_ptr[0]); @@ -4738,7 +4738,7 @@ void InvertHorizontalStrip16sToUYVY(PIXEL *lowpass_band[], // Horizontal lowpass // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i y_low1_epi16; // Lowpass coefficients __m128i y_low2_epi16; __m128i u_low1_epi16; @@ -4915,7 +4915,7 @@ void InvertHorizontalStrip16sToUYVY(PIXEL *lowpass_band[], // Horizontal lowpass // Save the value for use in the fast loop v_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients y_low1_epi16 = _mm_load_si128((__m128i *)&y_lowpass_ptr[0]); @@ -6496,7 +6496,7 @@ void InvertHorizontalStrip16sToBayerYUV(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -6681,7 +6681,7 @@ void InvertHorizontalStrip16sToBayerYUV(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); @@ -8178,7 +8178,7 @@ void InvertHorizontalStrip16sRGB2YUV(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -8362,7 +8362,7 @@ void InvertHorizontalStrip16sRGB2YUV(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); @@ -9792,7 +9792,7 @@ void InvertHorizontalStrip16sRGBA2YUVA(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i g_low1_epi16; // Lowpass coefficients __m128i g_low2_epi16; __m128i b_low1_epi16; @@ -10029,7 +10029,7 @@ void InvertHorizontalStrip16sRGBA2YUVA(HorizontalFilterParams) -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients g_low1_epi16 = _mm_load_si128((__m128i *)&g_lowpass_ptr[0]); @@ -11599,7 +11599,7 @@ void InvertHorizontalStrip16sRGB2YR16(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -11759,7 +11759,7 @@ void InvertHorizontalStrip16sRGB2YR16(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); @@ -13029,7 +13029,7 @@ void InvertHorizontalStrip16sRGB2B64A(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i g_low1_epi16; // Lowpass coefficients __m128i g_low2_epi16; __m128i b_low1_epi16; @@ -13232,7 +13232,7 @@ void InvertHorizontalStrip16sRGB2B64A(HorizontalFilterParams) a_odd_value = odd; } -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients g_low1_epi16 = _mm_load_si128((__m128i *)&g_lowpass_ptr[0]); @@ -14540,7 +14540,7 @@ void InvertHorizontalStrip16sRGB2RG30(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -14693,7 +14693,7 @@ void InvertHorizontalStrip16sRGB2RG30(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); diff --git a/Codec/allocator.h b/Codec/allocator.h index 3f7c183..795940b 100644 --- a/Codec/allocator.h +++ b/Codec/allocator.h @@ -1,23 +1,20 @@ -/*! @file allocator.h - -* @brief Memory tools -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -*/ +/*! + * @file allocator.h + * @brief Memory tools + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once @@ -25,19 +22,16 @@ #include #include #include "config.h" -#include // Include support for SSE2 intrinsics +#include // Include support for SSE2 intrinsics // The codec SDK and the codec library use the same memory allocator #include "../Common/CFHDAllocator.h" -#define INLINE inline - - #ifdef __cplusplus extern "C" { #endif -static INLINE void *Alloc(ALLOCATOR *allocator, size_t size) +static inline void *Alloc(ALLOCATOR *allocator, size_t size) { #if _ALLOCATOR //assert(allocator != NULL); @@ -53,7 +47,7 @@ static INLINE void *Alloc(ALLOCATOR *allocator, size_t size) return MEMORY_ALLOC(size); } -static INLINE void Free(ALLOCATOR *allocator, void *block) +static inline void Free(ALLOCATOR *allocator, void *block) { #if _ALLOCATOR //assert(allocator != NULL); @@ -89,7 +83,7 @@ static int m_allocationSize = 0; #endif -static INLINE void *AllocAligned(ALLOCATOR *allocator, size_t size, size_t alignment) +static inline void *AllocAligned(ALLOCATOR *allocator, size_t size, size_t alignment) { void *block = NULL; @@ -120,7 +114,7 @@ static INLINE void *AllocAligned(ALLOCATOR *allocator, size_t size, size_t align return block; } -static INLINE void FreeAligned(ALLOCATOR *allocator, void *block) +static inline void FreeAligned(ALLOCATOR *allocator, void *block) { #if _ALLOCATOR //assert(allocator != NULL); @@ -181,9 +175,7 @@ static INLINE void FreeAligned(ALLOCATOR *allocator, void *block) assert(found); } - #endif - } #ifdef __cplusplus diff --git a/Codec/bayer.c b/Codec/bayer.c index 09d30d5..500b779 100644 --- a/Codec/bayer.c +++ b/Codec/bayer.c @@ -14094,7 +14094,7 @@ void InvertHorizontalStrip16sBayerThruLUT(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -14251,7 +14251,7 @@ void InvertHorizontalStrip16sBayerThruLUT(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); @@ -15278,7 +15278,7 @@ void InvertHorizontalStrip16s444ThruLUT(HorizontalFilterParams) // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -15435,7 +15435,7 @@ void InvertHorizontalStrip16s444ThruLUT(HorizontalFilterParams) // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); diff --git a/Codec/bitstream.c b/Codec/bitstream.c index 3c50e8f..ae53531 100644 --- a/Codec/bitstream.c +++ b/Codec/bitstream.c @@ -1,23 +1,19 @@ -/*! @file - -* @brief -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -*/ +/*! + * @file bitstream.c + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "stdafx.h" #include "config.h" @@ -590,9 +586,9 @@ void SkipBits(BITSTREAM *stream, int nBits) return; } -uint32_t PeekBits(BITSTREAM *stream, int nBits) +uint32_t PeekBits(BITSTREAM *stream, int nBits) { -#if (1 && ASMOPT) +#if (ASMOPT) // Check that the offsets to the bitstream fields have not been changed //assert(offsetof(BITSTREAM, nBitsFree) == 4); @@ -885,7 +881,7 @@ void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) #else -#if (1 && ASMOPT) +#if (ASMOPT) // Write bits to a bitstream (optimized with doubleword shift instructions) void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) @@ -949,7 +945,7 @@ void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) //sfence // Serialize memory accesses } -#if (1 && DEBUG) +#if (DEBUG) stream->cntBits += nBits; #endif } @@ -988,7 +984,7 @@ void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) { wBuffer = wBits & BITMASK(nBits); nBitsFree -= nBits; -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBits); #endif } @@ -997,7 +993,7 @@ void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) wBuffer <<= nBits; wBuffer |= (wBits & BITMASK(nBits)); nBitsFree -= nBits; -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBits); #endif } @@ -1013,7 +1009,7 @@ void PutBits(BITSTREAM *stream, uint32_t wBits, int nBits) // Insert as many bits as will fit into the buffer wBuffer |= (wBits >> nBits) & BITMASK(nBitsFree); -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBitsFree); #endif // Insert all of the bytes in the buffer into the bitstream diff --git a/Codec/codec.c b/Codec/codec.c index 4914e46..379db09 100644 --- a/Codec/codec.c +++ b/Codec/codec.c @@ -554,7 +554,7 @@ void Quantize16s(PIXEL *image, int width, int height, int pitch, int divisor) column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)rowptr; __m128i quant_epi16 = _mm_set1_epi16(multiplier); __m128i zero_si128 = _mm_setzero_si128(); @@ -627,7 +627,7 @@ void Quantize8s(PIXEL8S *image, int width, int height, int pitch, int divisor) column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *group_ptr = (__m128i *)rowptr; __m128i quant_epi16 = _mm_set1_epi16(multiplier); __m128i zero_si128 = _mm_setzero_si128(); diff --git a/Codec/config.h b/Codec/config.h index f8eab8c..98324bc 100644 --- a/Codec/config.h +++ b/Codec/config.h @@ -1,6 +1,5 @@ /*! * @file config.h - * @brief * * (C) Copyright 2017 GoPro Inc (http://gopro.com/). * diff --git a/Codec/convert.c b/Codec/convert.c index c45f376..8e370ba 100644 --- a/Codec/convert.c +++ b/Codec/convert.c @@ -1,23 +1,19 @@ -/*! @file convert.c - -* @brief -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -*/ +/*! + * @file convert.c + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "config.h" #include "timing.h" @@ -61,12 +57,12 @@ int Timecode2frames(char *tc, int rate) if (rate == 0) rate = 24; if (rate == 23) rate = 24; if (rate == 29) rate = 30; - if (rate == 50) rate = 50, mult = 2; - if (rate == 59) rate = 60, mult = 2; + if (rate == 50) { rate = 50; mult = 2; } + if (rate == 59) { rate = 60; mult = 2; } if (tc) { -#if defined(_WIN32) && !defined(__GNUC__) +#ifdef _MSVC_VER ret = sscanf_s(tc, "%02d:%02d:%02d:%02d", &hr, &mn, &sc, &fr); #else ret = sscanf(tc, "%02d:%02d:%02d:%02d", &hr, &mn, &sc, &fr); @@ -506,7 +502,7 @@ void ConvertYUYVRowToUYVY(uint8_t *input, uint8_t *output, int length, int forma // Start processing at the first column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process four bytes each of luma and chroma per loop iteration //const int column_step = sizeof(__m64); @@ -552,7 +548,7 @@ void ConvertYUYVRowToV210(uint8_t *input, uint8_t *output, int length, int forma // Start processing at the first column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process four bytes each of luma and chroma per loop iteration //const int column_step = sizeof(__m64); @@ -929,7 +925,7 @@ void ConvertV210RowToPackedYUV(uint8_t *input, uint8_t *output, int length, uint uint8_t *output_ptr = output; int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 24; int post_column = length - (length % column_step); @@ -1572,7 +1568,7 @@ void ConvertYUV16sRowToV210(PIXEL *input, uint8_t *output, int frame_width) // Start processing at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process twelve values of luma and chroma per loop iteration const int column_step = 2 * v210_column_step; @@ -1703,7 +1699,7 @@ void ConvertYUV16uRowToYUV(PIXEL16U *y_input, PIXEL16U *u_input, PIXEL16U *v_inp // Start processing at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process sixteen values of luma and chroma per loop iteration const int column_step = 16; @@ -1812,7 +1808,7 @@ void ConvertYUV16uRowToV210(PIXEL16U *y_input, PIXEL16U *u_input, PIXEL16U *v_in // Start processing at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process twelve values of luma and chroma per loop iteration const int column_step = 6; @@ -2123,7 +2119,7 @@ void ConvertV210RowToPlanar16s(uint8_t *input, int length, PIXEL *y_output, PIXE __m128i *u_output_ptr = (__m128i *)v_output; __m128i *v_output_ptr = (__m128i *)u_output; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 48; int post_column = length - (length % column_step); @@ -2822,7 +2818,7 @@ void UnpackRowYUV16s(uint8_t *input, PIXEL *output, int width, int channel, if (format == COLOR_FORMAT_YUYV) { -#if (1 && XMMOPT) +#if (XMMOPT) { __m128i *input_ptr = (__m128i *)input; __m128i *output_ptr = (__m128i *)output; @@ -3715,7 +3711,7 @@ void UnpackYUVRow16s(uint8_t *input, int width, PIXEL *output[3]) PIXEL *u_output = output[1]; PIXEL *v_output = output[2]; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)input; //__m128i *input_ptr = (__m128i *)test_input; @@ -3741,7 +3737,7 @@ void UnpackYUVRow16s(uint8_t *input, int width, PIXEL *output[3]) int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) //__m128i input1_epi16; //__m128i input2_epi16; @@ -4102,7 +4098,7 @@ void ConvertPlanarRGB16uToPackedB64A(PIXEL *planar_output[], int planar_pitch[], { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); @@ -4215,7 +4211,7 @@ void ConvertPlanarRGB16uToPackedRGB32(PIXEL *planar_output[], int planar_pitch[] { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8, pos = 0; int post_column = width - (width % column_step); @@ -4534,7 +4530,7 @@ void ConvertPlanarRGB16uToPackedRGB24(PIXEL *planar_output[], int planar_pitch[] int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8, pos = 0; int post_column = width - (width % column_step); @@ -5129,7 +5125,7 @@ void ConvertPlanarRGB16uToPackedRGB30(PIXEL *planar_output[], int planar_pitch[] //TODO: Need to finish the optimized code -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); @@ -7286,7 +7282,7 @@ void ConvertYUVStripPlanar16uToPacked(PIXEL16U *planar_output[], int planar_pitc { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); @@ -8136,7 +8132,7 @@ void ConvertYUVRow16uToBGRA64(uint8_t *planar_output[], int planar_pitch[], ROI { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *output_ptr = (__m128i *)output_row_ptr; __m128i limiterRGB = _mm_set1_epi16(0x7fff - 0x3fff); @@ -9051,7 +9047,7 @@ void ConvertYUVRow16uToYUV444(uint8_t *planar_output[], int planar_pitch[], ROI { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *yptr = (__m128i *)y_row_ptr; __m128i *uptr = (__m128i *)u_row_ptr; @@ -9354,7 +9350,7 @@ void ConvertPlanarYUVToV210(PIXEL *planar_output[], int planar_pitch[], ROI roi, int v; uint32_t yuv; -#if (1 && XMMOPT) +#if (XMMOPT) // Process twelve bytes each of luma and chroma per loop iteration //const int column_step = 12; @@ -10266,7 +10262,7 @@ void ConvertUnpacked16sRowToPacked8u(PIXEL **channel_row_ptr, int num_channels, // Start processing at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process eight values of luma and chroma per loop iteration const int column_step = 8; @@ -10399,7 +10395,7 @@ void ConvertUnpacked16sRowToPacked8u(PIXEL **channel_row_ptr, int num_channels, // Start processing at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process sixteen values of luma and chroma per loop iteration const int column_step = 16; @@ -11112,7 +11108,7 @@ void ConvertUnpacked16sRowToYU64(PIXEL **input, int num_channels, uint8_t *outpu int column; -#if (1 && XMMOPT) +#if (XMMOPT) // Process sixteen values of luma and chroma per loop iteration const int column_step = 16; @@ -11156,7 +11152,7 @@ void ConvertUnpacked16sRowToYU64(PIXEL **input, int num_channels, uint8_t *outpu } //else YU64 -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -11291,7 +11287,7 @@ void ConvertUnpacked16sRowToB64A(PIXEL **input_plane, int num_channels, //alpha_Companded likely doesn't set as the is not does to go through Convert4444LinesToOutput -#if (1 && XMMOPT) +#if (XMMOPT) // Process eight ARGB tuples per loop iteration const int column_step = 8; @@ -11322,7 +11318,7 @@ void ConvertUnpacked16sRowToB64A(PIXEL **input_plane, int num_channels, // Start processing at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -11813,7 +11809,7 @@ void ConvertUnpacked16sRowToRGB30(PIXEL **input_plane, int num_channels, int column; -#if (1 && XMMOPT) +#if (XMMOPT) // Process eight ARGB tuples per loop iteration const int column_step = 8; @@ -11840,7 +11836,7 @@ void ConvertUnpacked16sRowToRGB30(PIXEL **input_plane, int num_channels, // Start processing at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -12137,7 +12133,7 @@ void ConvertUnpacked16sRowToRGBA64(PIXEL **input_plane, int num_channels, int column; -#if (1 && XMMOPT) +#if (XMMOPT) // Process eight ARGB tuples per loop iteration const int column_step = 8; @@ -12169,7 +12165,7 @@ void ConvertUnpacked16sRowToRGBA64(PIXEL **input_plane, int num_channels, // Start processing at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -12602,7 +12598,7 @@ void ConvertRGB2YUV(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) // The fast loop merges values from different phases to allow aligned stores __m128i *outptr; @@ -12660,7 +12656,7 @@ void ConvertRGB2YUV(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, outputline += output_pitch * row; outptr = (__m128i *)outputline; -#if (1 && XMMOPT) +#if (XMMOPT) // The reconstruction filters use pixels starting at the first column for (; column < post_column; column += column_step) @@ -13105,7 +13101,7 @@ void ConvertRGB2UYVY(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) // The fast loop merges values from different phases to allow aligned stores __m128i *outptr; @@ -13164,7 +13160,7 @@ void ConvertRGB2UYVY(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, outputline += output_pitch * row; outptr = (__m128i *)outputline; -#if (1 && XMMOPT) +#if (XMMOPT) // The reconstruction filters use pixels starting at the first column for (; column < post_column; column += column_step) @@ -13515,7 +13511,7 @@ void ConvertRGBA48toRGB32(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, PIX for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) // The fast loop merges values from different phases to allow aligned stores __m128i *outptr; @@ -13577,7 +13573,7 @@ void ConvertRGBA48toRGB32(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, PIX outbyteptr = (unsigned char *)outputline; -#if (1 && XMMOPT) +#if (XMMOPT) // The reconstruction filters use pixels starting at the first column for (; column < post_column; column += column_step) @@ -13784,7 +13780,7 @@ void ConvertRGB48toRGB24(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) // The fast loop merges values from different phases to allow aligned stores __m128i *outptr; @@ -13853,7 +13849,7 @@ void ConvertRGB48toRGB24(PIXEL *rlineptr, PIXEL *glineptr, PIXEL *blineptr, outbyteptr = (unsigned char *)outputline; -#if (1 && XMMOPT) +#if (XMMOPT) // The reconstruction filters use pixels starting at the first column for (; column < post_column; column += column_step) diff --git a/Codec/decoder.c b/Codec/decoder.c index 7b25f93..c168d80 100644 --- a/Codec/decoder.c +++ b/Codec/decoder.c @@ -523,7 +523,7 @@ void InitDecoder(DECODER *decoder, FILE *logfile, CODESET *cs) // Free data allocated within the decoder void ClearDecoder(DECODER *decoder) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -8539,7 +8539,7 @@ void ConvertLocalToOutput(DECODER *decoder, uint8_t *output, int pitch, int outp bool DecodeSample(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams, CFHDDATA *cfhddata) { //CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //CODEC_STATE *codec = &decoder->codec; @@ -9593,7 +9593,7 @@ bool DecodeSample(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch bool DecodeSampleGroup(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -9828,7 +9828,7 @@ bool DecodeSampleGroup(DECODER *decoder, BITSTREAM *input, uint8_t *output, int decoder->gop_length = 2; decoder->frame_count += 2; -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, @@ -9873,7 +9873,7 @@ bool DecodeSampleGroup(DECODER *decoder, BITSTREAM *input, uint8_t *output, int bool DecodeSampleFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -9920,7 +9920,7 @@ bool DecodeSampleFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int STOP(tk_decoding); -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, @@ -9980,7 +9980,7 @@ bool DecodeSampleFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int bool DecodeSampleIntraFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, int pitch, ColorParam *colorparams) { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -10245,7 +10245,7 @@ bool DecodeSampleIntraFrame(DECODER *decoder, BITSTREAM *input, uint8_t *output, // Decode a sample channel header bool DecodeSampleChannelHeader(DECODER *decoder, BITSTREAM *input) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_ERROR error = CODEC_ERROR_OKAY; @@ -10295,7 +10295,7 @@ bool DecodeSampleChannelHeader(DECODER *decoder, BITSTREAM *input) // Decode the coefficients in a subband bool DecodeSampleSubband(DECODER *decoder, BITSTREAM *input, int subband) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -10579,7 +10579,7 @@ bool DecodeSampleSubband(DECODER *decoder, BITSTREAM *input, int subband) bool DecodeSampleLowPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -10961,7 +10961,7 @@ bool DecodeSampleHighPassBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavele { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -11105,7 +11105,7 @@ bool DecodeSampleEmptyBand(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -11261,7 +11261,7 @@ bool DecodeBand16sLossless(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, { //CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif int result = true; @@ -11638,7 +11638,7 @@ void ComputeOutputDimensions(DECODER *decoder, int frame, int *decoded_width_out, int *decoded_height_out) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -11752,7 +11752,7 @@ void ReconstructSampleFrameToBuffer(DECODER *decoder, int frame, uint8_t *output { FRAME_INFO local_info; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif FRAME_INFO *info = &local_info; @@ -11809,7 +11809,7 @@ void ReconstructSampleFrameToBuffer(DECODER *decoder, int frame, uint8_t *output // The decoder can decode a video sample without returning a frame if (output == NULL || pitch == 0) return; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) // Force decoding to 16-bit pixels for debugging info->format = DECODED_FORMAT_YR16; #endif @@ -13359,7 +13359,7 @@ void ReconstructSampleFrameToBuffer(DECODER *decoder, int frame, uint8_t *output GD = BG + bayer_pitch / 4; // Pack the rows of Bayer components into the BYR3 pattern -#if (1 && XMMOPT) +#if (XMMOPT) { __m128i *G_128 = (__m128i *)G; @@ -14904,7 +14904,7 @@ void ReconstructQuarterFrame(DECODER *decoder, int num_channels, int frame_index, uint8_t *output, int output_pitch, FRAME_INFO *info, const SCRATCH *scratch, int precision) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif TRANSFORM **transform_array = decoder->transform; @@ -15208,7 +15208,7 @@ void ReconstructQuarterFrame(DECODER *decoder, int num_channels, break; default: -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, "ReconstructQuarterFrame bad color format: %d\n", format); @@ -15505,7 +15505,7 @@ void DecodeForceMetadataRefresh(DECODER *decoder) void SetDecoderFlags(DECODER *decoder, uint32_t flags) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -16977,7 +16977,7 @@ bool DecodeFastRunsFSM16s(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, { //CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif int result = true; @@ -17195,7 +17195,7 @@ bool SkipFastRunsFSM(DECODER *decoder, BITSTREAM *stream, IMAGE *wavelet, { //CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif int result; @@ -17858,7 +17858,7 @@ void TransformInverseFrameToRow16u(DECODER *decoder, TRANSFORM *transform[], int int channel; int row; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) PIXEL16U *output_buffer; #endif @@ -17884,7 +17884,7 @@ void TransformInverseFrameToRow16u(DECODER *decoder, TRANSFORM *transform[], int temporal_lowpass = (PIXEL *)&buffer[0]; temporal_highpass = (PIXEL *)&buffer[temporal_row_size]; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) output_buffer = (PIXEL16U *)&buffer[2 * temporal_row_size]; #endif @@ -17935,7 +17935,7 @@ void TransformInverseFrameToRow16u(DECODER *decoder, TRANSFORM *transform[], int // Process one row at a time from each channel for (row = 0; row < half_height; row++) { -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) PIXEL16U *output_row_ptr = output_buffer; PIXEL16U *planar_output[TRANSFORM_MAX_CHANNELS]; int planar_pitch[TRANSFORM_MAX_CHANNELS]; @@ -17982,7 +17982,7 @@ void TransformInverseFrameToRow16u(DECODER *decoder, TRANSFORM *transform[], int horizontal_highlow[channel] += pitch; horizontal_highhigh[channel] += pitch; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) // Write the rows of 16-bit pixels to a temporary buffer planar_output[channel] = output_row_ptr; planar_pitch[channel] = output_pitch * sizeof(PIXEL); @@ -18077,7 +18077,7 @@ void TransformInverseFrameSectionToRow16u(DECODER *decoder, int thread_index, in int return_value; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) PIXEL16U *output_buffer; #endif @@ -18115,7 +18115,7 @@ void TransformInverseFrameSectionToRow16u(DECODER *decoder, int thread_index, in temporal_lowpass = (PIXEL *)&buffer[0]; temporal_highpass = (PIXEL *)&buffer[temporal_row_size]; -#if (1 && DEBUG_ROW16U) +#if (DEBUG_ROW16U) output_buffer = (PIXEL16U *)&buffer[2 * temporal_row_size]; #endif @@ -18311,7 +18311,7 @@ void TransformInverseFrameSectionToRow16u(DECODER *decoder, int thread_index, in } } -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, "Finished transform, thread index: %d\n", thread_index); @@ -19026,7 +19026,7 @@ IMAGE *GetWaveletThreadSafe(DECODER *decoder, TRANSFORM *transform, int index, if (decoder != NULL && transform != NULL) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -19061,7 +19061,7 @@ CODEC_ERROR UpdateCodecState(DECODER *decoder, BITSTREAM *input, CODEC_STATE *co { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -19632,7 +19632,7 @@ CODEC_ERROR UpdateCodecState(DECODER *decoder, BITSTREAM *input, CODEC_STATE *co break; -#if (1 && DEBUG) +#if (DEBUG) case CODEC_TAG_SAMPLE_END: // Marks the end of the sample (for debugging only) assert(0); @@ -19703,7 +19703,7 @@ void UpdateWaveletBandValidFlags(DECODER *decoder, IMAGE *wavelet, int band) if (decoder != NULL && wavelet != NULL) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -19851,7 +19851,7 @@ bool DecodedBandsValid(IMAGE *wavelet, int index, int transform_type) void QueueThreadedTransform(DECODER *decoder, int channel, int index) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -19892,7 +19892,7 @@ void QueueThreadedTransform(DECODER *decoder, int channel, int index) decoder->transform_queue.free_entry++; decoder->transform_queue.num_entries++; -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, "Queued transform, channel: %d, index: %d\n", channel, index); @@ -20205,7 +20205,7 @@ CODEC_ERROR UncompressedSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *i { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //CODEC_STATE *codec = &decoder->codec; @@ -20401,7 +20401,7 @@ CODEC_ERROR UncompressedSampleFrameYUVToBuffer(DECODER *decoder, FRAME_INFO *inf { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //CODEC_STATE *codec = &decoder->codec; @@ -20781,7 +20781,7 @@ CODEC_ERROR UncompressedSampleFrameRGBToBuffer(DECODER *decoder, FRAME_INFO *inf { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //CODEC_STATE *codec = &decoder->codec; @@ -21194,7 +21194,7 @@ CODEC_ERROR ReconstructSampleFrameBayerToBuffer(DECODER *decoder, FRAME_INFO *in CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -21252,7 +21252,7 @@ CODEC_ERROR ReconstructSampleFrameBayerFullToBuffer(DECODER *decoder, FRAME_INFO { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -21380,7 +21380,7 @@ CODEC_ERROR ReconstructSampleFrameDeBayerFullToBuffer(DECODER *decoder, FRAME_IN { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif CODEC_STATE *codec = &decoder->codec; @@ -21565,7 +21565,7 @@ CODEC_ERROR ReconstructSampleFrameBayerHalfToBuffer(DECODER *decoder, FRAME_INFO { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //CODEC_STATE *codec = &decoder->codec; @@ -21651,7 +21651,7 @@ CODEC_ERROR ReconstructSampleFrameBayerHalfToBuffer(DECODER *decoder, FRAME_INFO CODEC_ERROR ReconstructSampleFrameBayerQuarterToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif //FRAME_INFO *info = &decoder->frame; @@ -21678,7 +21678,7 @@ CODEC_ERROR ReconstructSampleFrameYUV422ToBuffer(DECODER *decoder, int frame, ui { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif FRAME_INFO *info = &decoder->frame; @@ -22215,7 +22215,7 @@ CODEC_ERROR ReconstructSampleFrameYUV422ToBuffer(DECODER *decoder, int frame, ui CODEC_ERROR ReconstructSampleFrameRGB444ToBuffer(DECODER *decoder, int frame, uint8_t *output, int pitch) { CODEC_ERROR error = CODEC_ERROR_OKAY; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif FRAME_INFO *info = &decoder->frame; @@ -22635,7 +22635,7 @@ CODEC_ERROR ReconstructSampleFrameRGB444ToBuffer(DECODER *decoder, int frame, ui break; default: -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, "Invalid decoded format: %d\n", info->format); @@ -22799,7 +22799,7 @@ void TransformInverseSpatialThreadedYUV422ToBuffer(DECODER *decoder, int frame_i int chroma_offset, int precision) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -22859,7 +22859,7 @@ void TransformInverseSpatialThreadedYUV422ToBuffer(DECODER *decoder, int frame_i // Wait for all of the worker threads to finish ThreadPoolWaitAllDone(&decoder->worker_thread.pool); -#if (1 && DEBUG) +#if (DEBUG) if (logfile) { fprintf(logfile, "All worker threads signalled done\n"); @@ -22875,7 +22875,7 @@ void TransformInverseSpatialUniversalThreadedToRow16u(DECODER *decoder, int fram int chroma_offset, int precision) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -22933,7 +22933,7 @@ void TransformInverseSpatialUniversalThreadedToOutput( HorizontalInverseFilterOutputProc horizontal_filter_proc) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif @@ -22987,7 +22987,7 @@ void TransformInverseSpatialSectionToOutput(DECODER *decoder, int thread_index, HorizontalInverseFilterOutputProc horizontal_filter_proc) { -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif TRANSFORM **transform = decoder->transform; diff --git a/Codec/encoder.c b/Codec/encoder.c index ecd38c0..af2986a 100644 --- a/Codec/encoder.c +++ b/Codec/encoder.c @@ -1737,7 +1737,7 @@ void EncodeRelease(ENCODER *encoder, TRANSFORM *transform[], int num_transforms, } #endif -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) // Close the trace file CloseTraceFile(); #endif @@ -1975,7 +1975,7 @@ bool EncodeSample(ENCODER *encoder, uint8_t *data, int width, int height, int pi } #endif -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TraceEncodeFrame(encoder->frame_number, encoder->gop_length, width, height); #endif @@ -5070,7 +5070,7 @@ void EncodeQuantLongRuns(ENCODER *encoder, BITSTREAM *stream, PIXEL *image, } #endif -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TraceEncodeBand(width, height); #endif @@ -5144,7 +5144,7 @@ void EncodeQuantLongRuns(ENCODER *encoder, BITSTREAM *stream, PIXEL *image, wBuffer <<= nBits; wBuffer |= (wBits & BITMASK(nBits)); nBitsFree -= nBits; -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBits); #endif } @@ -5159,7 +5159,7 @@ void EncodeQuantLongRuns(ENCODER *encoder, BITSTREAM *stream, PIXEL *image, #endif // Insert as many bits as will fit into the buffer wBuffer |= (wBits >> nBits) & BITMASK(nBitsFree); -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBitsFree); #endif // Insert all of the bytes in the buffer into the bitstream @@ -5233,7 +5233,7 @@ void EncodeQuantLongRuns(ENCODER *encoder, BITSTREAM *stream, PIXEL *image, wBuffer <<= nBits; wBuffer |= (wBits & BITMASK(nBits)); nBitsFree -= nBits; -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBits); #endif } @@ -5248,7 +5248,7 @@ void EncodeQuantLongRuns(ENCODER *encoder, BITSTREAM *stream, PIXEL *image, #endif // Insert as many bits as will fit into the buffer wBuffer |= (wBits >> nBits) & BITMASK(nBitsFree); -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TracePutBits(nBitsFree); #endif // Insert all of the bytes in the buffer into the bitstream @@ -5682,7 +5682,7 @@ int SetCodingFlags(ENCODER *encoder, int subband, int *active_codebook_ret, int int difference_coding = 0; int peaks_coding = 0; -#if (1 && CODEC_NUM_CODESETS >= 2) +#if (CODEC_NUM_CODESETS >= 2) if (encoder->progressive) { if (encoder->gop_length == 2 && (subband >= 7 && subband <= 10)) // use the deeper table for temporal difference. @@ -7276,7 +7276,7 @@ void EncodeQuantizedGroup(ENCODER *encoder, TRANSFORM *transform[], int num_tran //int k; int channel_size_in_byte; -#if (1 && TRACE_PUTBITS) +#if (TRACE_PUTBITS) TraceEncodeChannel(channel); #endif // Align start of channel on a bitword boundary @@ -8778,7 +8778,7 @@ bool EncodeSampleThreaded(ENCODER *encoder, uint8_t *data, int width, int height } #endif -#if (1 && _THREADED_ENCODER) +#if (_THREADED_ENCODER) if (format == COLOR_FORMAT_YUYV) { @@ -8860,7 +8860,7 @@ bool EncodeSampleThreaded(ENCODER *encoder, uint8_t *data, int width, int height } #endif -#if (1 && _THREADED_ENCODER) +#if (_THREADED_ENCODER) if (format == COLOR_FORMAT_YUYV) { diff --git a/Codec/entropy_threading.c b/Codec/entropy_threading.c index 2f13fa6..3ea105b 100644 --- a/Codec/entropy_threading.c +++ b/Codec/entropy_threading.c @@ -294,7 +294,7 @@ void DecodeEntropy(DECODER *decoder, int work_index, int thread_index, FSM *fsm, THREAD_PROC(EntropyWorkerThreadProc, lpParam) { DECODER *decoder = (DECODER *)lpParam; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif THREAD_ERROR error = THREAD_ERROR_OKAY; @@ -369,7 +369,7 @@ THREAD_PROC(EntropyWorkerThreadProc, lpParam) THREAD_PROC(ParallelThreadProc, lpParam) { DECODER *decoder = (DECODER *)lpParam; -#if (1 && DEBUG) +#if (DEBUG) FILE *logfile = decoder->logfile; #endif THREAD_ERROR error = THREAD_ERROR_OKAY; diff --git a/Codec/error.h b/Codec/error.h index 42e82d7..83a6af1 100644 --- a/Codec/error.h +++ b/Codec/error.h @@ -1,23 +1,19 @@ -/*! @file error.h - -* @brief -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -*/ +/*! + * @file error.h + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef _ERROR_H #define _ERROR_H @@ -144,7 +140,7 @@ typedef enum codec_error } CODEC_ERROR; // Convert a bistream error code into a codec error code -static INLINE CODEC_ERROR CodecErrorBitstream(BITSTREAM *stream) +static inline CODEC_ERROR CodecErrorBitstream(BITSTREAM *stream) { int32_t error_code = (int32_t)CODEC_ERROR_BITSTREAM; @@ -166,4 +162,4 @@ static INLINE CODEC_ERROR CodecErrorBitstream(BITSTREAM *stream) } } -#endif +#endif // _ERROR_H diff --git a/Codec/frame.c b/Codec/frame.c index 4215553..679271d 100644 --- a/Codec/frame.c +++ b/Codec/frame.c @@ -430,7 +430,7 @@ void ConvertRGB32to10bitYUVFrame(uint8_t *rgb, int pitch, FRAME *frame, uint8_t { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 16; int post_column = roi.width - (roi.width % column_step); @@ -5668,7 +5668,7 @@ void ConvertRGBA64ToFrame16s(uint8_t *data, int pitch, FRAME *frame, uint8_t *bu // Start at the leftmost column //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -5905,7 +5905,7 @@ void ConvertRGB48ToFrame16s(uint8_t *data, int pitch, FRAME *frame, uint8_t *buf // Start at the leftmost column //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -6124,7 +6124,7 @@ void ConvertRGBtoRGB48(uint8_t *data, int pitch, FRAME *frame, uint8_t *buffer, int column = 0; //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -6242,7 +6242,7 @@ void ConvertRGBAtoRGB48(uint8_t *data, int pitch, FRAME *frame, uint8_t *buffer, int row = rowp < display_height ? rowp : display_height - 1; //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -6374,7 +6374,7 @@ void ConvertRGBAtoRGBA64(uint8_t *data, int pitch, FRAME *frame, uint8_t *buffer int column = 0; //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -6569,7 +6569,7 @@ CODEC_ERROR ConvertBGRA64ToFrame_4444_16s(uint8_t *data, int pitch, FRAME *frame int column = 0; //TODO: Process each row by calling an optimized subroutine -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -6881,7 +6881,7 @@ void ConvertAnyDeep444to422(uint8_t *data, int pitch, FRAME *frame, uint8_t *buf int column = 0; //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -7986,7 +7986,7 @@ void ConvertYUVAFloatToFrame16s(uint8_t *data, int pitch, FRAME *frame, uint8_t int column = 0; //TODO: Add optimized code -#if (1 && XMMOPT) +#if (XMMOPT) #endif @@ -8190,7 +8190,7 @@ void ConvertYUVAFloatToFrame_RGB444_16s(uint8_t *data, int pitch, FRAME *frame, // Start at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) //TODO: Add optimized code #endif // Pointer into the YUVA input row @@ -8388,7 +8388,7 @@ void ConvertYUVAFloatToFrame_RGBA4444_16s(uint8_t *data, int pitch, FRAME *frame // Start at the leftmost column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) //TODO: Add optimized code #endif // Pointer into the YUVA input row diff --git a/Codec/image.h b/Codec/image.h index 6714005..61dad05 100644 --- a/Codec/image.h +++ b/Codec/image.h @@ -96,7 +96,7 @@ typedef unsigned short PIXEL16U; #define PIXEL16U_MIN 0 #define PIXEL16U_MAX USHRT_MAX -static INLINE int Clamp16s(int x) +static inline int Clamp16s(int x) { if (x < SHRT_MIN) x = SHRT_MIN; else if (x > SHRT_MAX) x = SHRT_MAX; diff --git a/Codec/quantize.c b/Codec/quantize.c index f4d766d..786b9d7 100644 --- a/Codec/quantize.c +++ b/Codec/quantize.c @@ -756,7 +756,7 @@ void QuantizeRow16s(PIXEL16S *rowptr, int length, int divisor) short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *group_ptr = (__m64 *)rowptr; __m64 zero_si64 = _mm_setzero_si64(); __m64 round_pi16 = _mm_set1_pi16(1); @@ -786,7 +786,7 @@ void QuantizeRow16s(PIXEL16S *rowptr, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_pi16 = _mm_set1_pi16(multiplier); @@ -858,7 +858,7 @@ void QuantizeRow16s(PIXEL16S *rowptr, int length, int divisor) short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *group_ptr = (__m128i *)rowptr; __m128i zero_si128 = _mm_setzero_si128(); __m128i quant_epi16; @@ -878,7 +878,7 @@ void QuantizeRow16s(PIXEL16S *rowptr, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -957,7 +957,7 @@ void QuantizeRow8s(PIXEL8S *rowptr, int length, int divisor) short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *group_ptr = (__m64 *)rowptr; __m64 zero_si64 = _mm_setzero_si64(); __m64 round_pi16 = _mm_set1_pi16(1); @@ -986,7 +986,7 @@ void QuantizeRow8s(PIXEL8S *rowptr, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_pi16 = _mm_set1_pi16(multiplier); @@ -1072,7 +1072,7 @@ void QuantizeRow8s(PIXEL8S *rowptr, int length, int divisor) short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *group_ptr = (__m128i *)rowptr; __m128i zero_si128 = _mm_setzero_si128(); //__m128i round_epi16 = _mm_set1_epi16(1); @@ -1095,7 +1095,7 @@ void QuantizeRow8s(PIXEL8S *rowptr, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -1191,7 +1191,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) int column; // MMX version -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *input_ptr = (__m64 *)input; __m64 *output_ptr = (__m64 *)output; __m64 zero_si64 = _mm_setzero_si64(); @@ -1241,7 +1241,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) // Start at the left end of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_pi16 = _mm_set1_pi16(multiplier); @@ -1330,7 +1330,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) uint32_t multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)input; __m128i *output_ptr = (__m128i *)output; @@ -1375,7 +1375,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -1473,7 +1473,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) } #endif -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)input; __m128i *output_ptr = (__m128i *)output; @@ -1509,7 +1509,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -1662,7 +1662,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) } #endif -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)input; __m128i *output_ptr = (__m128i *)output; @@ -1698,7 +1698,7 @@ void QuantizeRow16sTo16s(PIXEL *input, PIXEL *output, int length, int divisor) // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -1853,7 +1853,7 @@ void QuantizeRow16sToCoded(ENCODER *encoder, BITSTREAM *stream, PIXEL *input, in if (divisor > 1) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *input_ptr = (__m64 *)input; __m64 *output_ptr = (__m64 *)output; __m64 zero_si64 = _mm_setzero_si64(); @@ -1877,7 +1877,7 @@ void QuantizeRow16sToCoded(ENCODER *encoder, BITSTREAM *stream, PIXEL *input, in // Start at the left end of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_pi16 = _mm_set1_pi16(multiplier); @@ -1979,7 +1979,7 @@ void QuantizeRow16sTo8s(PIXEL16S *input, PIXEL8S *output, int length, int diviso short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *input_ptr = (__m64 *)input; __m64 *output_ptr = (__m64 *)output; __m64 zero_si64 = _mm_setzero_si64(); @@ -2031,7 +2031,7 @@ void QuantizeRow16sTo8s(PIXEL16S *input, PIXEL8S *output, int length, int diviso // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_pi16 = _mm_set1_pi16(multiplier); @@ -2150,7 +2150,7 @@ void QuantizeRow16sTo8s(PIXEL16S *input, PIXEL8S *output, int length, int diviso unsigned short multiplier; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input_ptr = (__m128i *)input; __m128i *output_ptr = (__m128i *)output; __m128i zero_si128 = _mm_setzero_si128(); @@ -2197,7 +2197,7 @@ void QuantizeRow16sTo8s(PIXEL16S *input, PIXEL8S *output, int length, int diviso // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) quant_epi16 = _mm_set1_epi16(multiplier); @@ -2342,7 +2342,7 @@ void DequantizeBandRow(PIXEL8S *input, int width, int quantization, PIXEL *outpu const int midpoint = 0; #endif -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 8; const int post_column = width - (width % column_step); @@ -2494,7 +2494,7 @@ void DequantizeBandRow(PIXEL8S *input, int width, int quantization, PIXEL *outpu // Address of the cache line for the current block //const char *block_address = (char *)ALIGN(input, prefetch_size); -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 16; const int post_column = width - (width % column_step); @@ -2521,7 +2521,7 @@ void DequantizeBandRow(PIXEL8S *input, int width, int quantization, PIXEL *outpu // Prefetch rows that may be used in the near future //_mm_prefetch(block_address + prefetch_offset, _MM_HINT_T2); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first sixteen input coefficients input1_epi8 = _mm_load_si128(input_ptr++); @@ -2541,7 +2541,7 @@ void DequantizeBandRow(PIXEL8S *input, int width, int quantization, PIXEL *outpu for (; column < post_column; column += column_step) { -#if (1 && PREFETCH) +#if (PREFETCH) // Prefetch input data that may be used in the near future //if (ISALIGNED(input_ptr, prefetch_size)) { @@ -2659,7 +2659,7 @@ void DequantizeBandRow16s(PIXEL *input, int width, int quantization, PIXEL *outp // Address of the cache line for the current block //const char *block_address = (char *)ALIGN(input, prefetch_size); /* Faulty???? ---------------------------------------------------------------------------------------------------------------- - #if (1 && XMMOPT) + #if (XMMOPT) const int column_step = 16; const int post_column = width - (width % column_step); @@ -2688,7 +2688,7 @@ void DequantizeBandRow16s(PIXEL *input, int width, int quantization, PIXEL *outp // Prefetch rows that may be used in the near future //_mm_prefetch(block_address + prefetch_offset, _MM_HINT_T2); - #if (1 && XMMOPT) + #if (XMMOPT) // Preload the first sixteen input coefficients input1_epi8 = _mm_load_si128(input_ptr++); @@ -2708,7 +2708,7 @@ void DequantizeBandRow16s(PIXEL *input, int width, int quantization, PIXEL *outp for (; column < post_column; column += column_step) { - #if (1 && PREFETCH) + #if (PREFETCH) // Prefetch input data that may be used in the near future //if (ISALIGNED(input_ptr, prefetch_size)) { diff --git a/Codec/recursive.c b/Codec/recursive.c index 4effb5a..835877d 100644 --- a/Codec/recursive.c +++ b/Codec/recursive.c @@ -320,7 +320,7 @@ void FilterVerticalMiddleStrip(PIXEL *input[6], int width, PIXEL *lowpass, PIXEL PIXEL *highpass_ptr = highpass; int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowpass_group_ptr = (__m128i *)lowpass_ptr; __m128i *highpass_group_ptr = (__m128i *)highpass_ptr; @@ -331,7 +331,7 @@ void FilterVerticalMiddleStrip(PIXEL *input[6], int width, PIXEL *lowpass, PIXEL // Begin processing at the left hand column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of eight pixels at a time for (; column < post_column; column += column_step) @@ -462,7 +462,7 @@ void FilterVerticalMiddleStripQuantBoth(PIXEL *input[6], int width, } #endif -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowpass_group_ptr = (__m128i *)lowpass_ptr; __m128i *highpass_group_ptr = (__m128i *)highpass_ptr; @@ -473,7 +473,7 @@ void FilterVerticalMiddleStripQuantBoth(PIXEL *input[6], int width, // Begin processing at the left hand column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of eight pixels at a time for (; column < post_column; column += column_step) @@ -1605,7 +1605,7 @@ void TransformForwardProgressiveIntraFrameRecursive(ENCODER *encoder, IMAGE *ima transform->logfile = encoder->logfile; #endif -#if (1 && DEBUG) +#if (DEBUG) if (transform->logfile) { fprintf(transform->logfile, "Calling recursive transform for channel: %d\n", channel); @@ -1688,7 +1688,7 @@ void TransformForwardProgressiveIntraFrameRecursiveYUV(ENCODER *encoder, BYTE *f TRANSFORM *transform = transform_array[channel]; int channel_width = transform->width; -#if (1 && DEBUG) +#if (DEBUG) if (transform->logfile) { fprintf(transform->logfile, @@ -1774,7 +1774,7 @@ void TransformForwardInterlacedIntraFrameRecursiveYUV(ENCODER *encoder, BYTE *fr TRANSFORM *transform = transform_array[channel]; int channel_width = transform->width; -#if (1 && DEBUG) +#if (DEBUG) if (transform->logfile) { fprintf(transform->logfile, diff --git a/Codec/spatial.c b/Codec/spatial.c index a9a0dc9..50b4d09 100644 --- a/Codec/spatial.c +++ b/Codec/spatial.c @@ -172,7 +172,7 @@ void FilterHorizontalRow16s(PIXEL *input, PIXEL *lowpass, PIXEL *highpass, int w // Intialize the mask used for downsampling the convolution results mask_epi16 = _mm_set1_epi32(0x0000FFFF); -#if (1 && XMMOPT) +#if (XMMOPT) // Process two sets of four input pixels to get one set of four output pixels for (; column < post_column; column += column_step) { @@ -492,7 +492,7 @@ void FilterHorizontalRow16s(PIXEL *input, PIXEL *lowpass, PIXEL *highpass, int w assert(ISALIGNED16(highpass_ptr)); -#if (1 && XMMOPT) +#if (XMMOPT) // Process two sets of four input pixels to get one set of four output pixels for (; column < post_column; column += column_step) { @@ -828,7 +828,7 @@ void FilterHorizontalRow16s(PIXEL *input, PIXEL *lowpass, PIXEL *highpass, int w assert(ISALIGNED16(highpass_ptr)); -#if (1 && XMMOPT) +#if (XMMOPT) // Process two sets of four input pixels to get one set of four output pixels for (; column < post_column; column += column_step) { @@ -3590,7 +3590,7 @@ void FilterHorizontalRow10bit16s(PIXEL *input, PIXEL *lowpass, PIXEL *highpass, // Intialize the mask used for downsampling the convolution results mask_epi16 = _mm_set1_epi32(0x0000FFFF); -#if (1 && XMMOPT) +#if (XMMOPT) // Process two sets of four input pixels to get one set of four output pixels for (; column < post_column; column += column_step) { @@ -5276,7 +5276,7 @@ void FilterHorizontalRowScaled16sDifferenceFiltered(PIXEL *input, PIXEL *lowpass assert(ISALIGNED16(highpass_ptr)); -#if (1 && XMMOPT) // SSE2 +#if (XMMOPT) // SSE2 // Process two sets of four input pixels to get one set of four output pixels for (; column < post_column; column += column_step) { @@ -7736,7 +7736,7 @@ void FilterVertical(PIXEL *input, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = roi.width - (roi.width % column_step); int quad_pitch = (input_pitch * sizeof(PIXEL)) / sizeof(__m64); @@ -7746,7 +7746,7 @@ void FilterVertical(PIXEL *input, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -7856,7 +7856,7 @@ void FilterVertical(PIXEL *input, int input_pitch, // Should have left the loop at the last row assert(row == last_row); -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the row pointer if using SIMD instructions rowptr += 2 * input_pitch; #endif @@ -7907,7 +7907,7 @@ void FilterLowpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int out for (row = 0; row < roi.height; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *quad1_ptr = (__m64 *)rowptr; __m64 *quad2_ptr = (__m64 *)&rowptr[input_pitch]; __m64 *outptr = (__m64 *)outrow; @@ -7964,7 +7964,7 @@ void FilterLowpassVerticalQuant(PIXEL *input, int input_pitch, PIXEL8S *output, { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *quad1_ptr = (__m64 *)rowptr; __m64 *quad2_ptr = (__m64 *)&rowptr[input_pitch]; @@ -8090,7 +8090,7 @@ void FilterHighpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int ou for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = roi.width - (roi.width % column_step); int quad_pitch = (input_pitch * sizeof(PIXEL)) / sizeof(__m64); @@ -8099,7 +8099,7 @@ void FilterHighpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int ou // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -8191,7 +8191,7 @@ void FilterHighpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int ou // Should have left the loop at the last row assert(row == last_row); -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the row pointer if using SIMD instructions rowptr += 2 * input_pitch; #endif @@ -8268,7 +8268,7 @@ void FilterHighpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int ou for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 8; const int post_column = roi.width - (roi.width % column_step); int group_pitch = (input_pitch * sizeof(PIXEL)) / sizeof(__m128i); @@ -8280,7 +8280,7 @@ void FilterHighpassVertical(PIXEL *input, int input_pitch, PIXEL *output, int ou // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of eight pixels at a time for (; column < post_column; column += column_step) { @@ -8507,7 +8507,7 @@ void FilterHighpassVerticalQuant(PIXEL *input, int input_pitch, PIXEL8S *output, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = roi.width - (roi.width % column_step); int quad_pitch = (input_pitch * sizeof(PIXEL)) / sizeof(__m64); @@ -8516,7 +8516,7 @@ void FilterHighpassVerticalQuant(PIXEL *input, int input_pitch, PIXEL8S *output, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) { @@ -8633,7 +8633,7 @@ void FilterHighpassVerticalQuant(PIXEL *input, int input_pitch, PIXEL8S *output, // Should have left the loop at the last row assert(row == last_row); -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the row pointer if using SIMD instructions rowptr += 2 * input_pitch; #endif @@ -8894,7 +8894,7 @@ void InvertVertical16s(PIXEL *lowpass, int lowpass_pitch, // Process the middle rows using the ordinary reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = roi.width - (roi.width % column_step); @@ -8906,7 +8906,7 @@ void InvertVertical16s(PIXEL *lowpass, int lowpass_pitch, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -9587,7 +9587,7 @@ void FilterSpatialQuant16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *lowlow_ptr; __m64 *highlow_ptr = (__m64 *)highlow_buffer; __m64 *lowhigh_ptr = (__m64 *)lowhigh_buffer; @@ -9599,7 +9599,7 @@ void FilterSpatialQuant16s(PIXEL *input_image, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) if (lowlow_quantization > 1) { @@ -10189,7 +10189,7 @@ void FilterSpatialQuant16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowlow_ptr; __m128i *highlow_ptr = (__m128i *)highlow_buffer; __m128i *lowhigh_ptr = (__m128i *)lowhigh_buffer; @@ -10201,7 +10201,7 @@ void FilterSpatialQuant16s(PIXEL *input_image, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) if (lowlow_quantization > 1) { @@ -10812,7 +10812,7 @@ void FilterSpatialQuantDifferenceLL16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *lowlow_ptr; __m64 *highlow_ptr = (__m64 *)highlow_buffer; __m64 *lowhigh_ptr = (__m64 *)lowhigh_buffer; @@ -10824,7 +10824,7 @@ void FilterSpatialQuantDifferenceLL16s(PIXEL *input_image, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) if (lowlow_quantization > 1) { @@ -11391,7 +11391,7 @@ void FilterSpatialQuantDifferenceLL16s(PIXEL *input_image, int input_pitch, { int lastsumvalue = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowlow_ptr; __m128i *highlow_ptr = (__m128i *)highlow_buffer; __m128i *lowhigh_ptr = (__m128i *)lowhigh_buffer; @@ -11403,7 +11403,7 @@ void FilterSpatialQuantDifferenceLL16s(PIXEL *input_image, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) lowlow_ptr = (__m128i *)lowlow_row_ptr; // Process a group of eight pixels at a time @@ -12040,7 +12040,7 @@ void FilterSpatialPrescaleQuant16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) #if _QUANTIZE_SPATIAL_LOWPASS __m64 *lowlow_ptr = (__m64 *)lowlow_buffer; #else @@ -12056,7 +12056,7 @@ void FilterSpatialPrescaleQuant16s(PIXEL *input_image, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -12602,7 +12602,7 @@ void FilterSpatialPrescaleQuant16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) #if _QUANTIZE_SPATIAL_LOWPASS __m128i *lowlow_ptr = (__m128i *)lowlow_buffer; #else @@ -12629,7 +12629,7 @@ void FilterSpatialPrescaleQuant16s(PIXEL *input_image, int input_pitch, assert(ISALIGNED16(highlow_ptr)); assert(ISALIGNED16(highhigh_ptr)); -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -13193,7 +13193,7 @@ void FilterSpatialV210Quant16s(PIXEL *input_image, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) #if _QUANTIZE_SPATIAL_LOWPASS __m128i *lowlow_ptr = (__m128i *)lowlow_buffer; #else @@ -13220,7 +13220,7 @@ void FilterSpatialV210Quant16s(PIXEL *input_image, int input_pitch, assert(ISALIGNED16(highlow_ptr)); assert(ISALIGNED16(highhigh_ptr)); -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -13805,7 +13805,7 @@ void FilterSpatialQuant16sToCoded(ENCODER *encoder, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) #if _QUANTIZE_SPATIAL_LOWPASS __m64 *lowlow_ptr = (__m64 *)lowlow_buffer; #else @@ -13821,7 +13821,7 @@ void FilterSpatialQuant16sToCoded(ENCODER *encoder, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -14426,7 +14426,7 @@ void FilterSpatialYUVQuant16s(uint8_t *input_data, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) #if _QUANTIZE_SPATIAL_LOWPASS __m64 *lowlow_ptr = (__m64 *)lowlow_buffer; #else @@ -14442,7 +14442,7 @@ void FilterSpatialYUVQuant16s(uint8_t *input_data, int input_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of four pixels at a time for (; column < post_column; column += column_step) @@ -15007,7 +15007,7 @@ void FilterSpatialYUVQuant16s(uint8_t *input_data, int input_pitch, for (; row < last_row; row += 2) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowlow_ptr = (__m128i *)lowlow_row_ptr; __m128i *highlow_ptr = (__m128i *)highlow_buffer; __m128i *lowhigh_ptr = (__m128i *)lowhigh_buffer; @@ -15109,7 +15109,7 @@ void FilterSpatialYUVQuant16s(uint8_t *input_data, int input_pitch, dummy1 += dummy2; #endif -#if (1 && XMMOPT) +#if (XMMOPT) // Process a group of eight pixels at a time for (; column < post_column; column += column_step) @@ -15592,7 +15592,7 @@ void InvertSpatialMiddleRow16s(PIXEL *lowlow_band, int lowlow_pitch, int buffer_pitch; int column; -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 8; int post_column = width - (width % column_step); @@ -15647,7 +15647,7 @@ void InvertSpatialMiddleRow16s(PIXEL *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) even_lowpass_ptr = (__m128i *)even_lowpass; even_highpass_ptr = (__m128i *)even_highpass; @@ -17090,7 +17090,7 @@ void InvertRGB444MiddleRow16sToB64A(PIXEL *lowlow_band, int lowlow_pitch, int buffer_pitch; int column; -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 4; int post_column = width - (width % column_step); @@ -17146,7 +17146,7 @@ void InvertRGB444MiddleRow16sToB64A(PIXEL *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) even_lowpass_ptr = (__m64 *)even_lowpass; even_highpass_ptr = (__m64 *)even_highpass; @@ -18566,7 +18566,7 @@ void InvertHorizontalRow16s8sTo16sBuffered(PIXEL *lowpass, // Row of horizon // Place the odd result in the odd column output[1] = SATURATE(odd); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass coefficients low1_pi16 = *((__m64 *)&lowpass[column]); @@ -18933,7 +18933,7 @@ void InvertHorizontalRow16s8sTo16sBuffered(PIXEL *lowpass, // Row of horizon } */ -#if (1 && _FASTLOOP && XMMOPT) +#if (_FASTLOOP && XMMOPT) // Preload the first eight lowpass coefficients low1_epi16 = _mm_load_si128((__m128i *)&lowpass[column]); @@ -19365,7 +19365,7 @@ void InvertHorizontalRow16s(PIXEL *lowpass, // Row of horizontal lowpass coe //even = 100; //***DEBUG*** //odd = 100; //***DEBUG*** -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass coefficients low1_epi16 = _mm_load_si128((__m128i *)&lowpass[column]); @@ -19690,7 +19690,7 @@ void BypassHorizontalRow16s(PIXEL *lowpass, // Row of horizontal lowpass coe column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) if (ISALIGNED16(lowpass) && ISALIGNED16(colptr)) { @@ -19839,7 +19839,7 @@ void InvertHorizontalRow8sBuffered(PIXEL8S *lowpass_data, // Row of horizontal // Place the odd result in the odd column output[1] = SATURATE(odd); -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass coefficients low1_pi16 = *((__m64 *)&lowline[column]); @@ -20182,7 +20182,7 @@ void InvertHorizontalRow8sBuffered(PIXEL8S *lowpass_data, // Row of horizontal //remainder[1] = SATURATE(odd); #endif -#if (1 && _FASTLOOP && XMMOPT) +#if (_FASTLOOP && XMMOPT) // Preload the first four lowpass coefficients low1_epi16 = _mm_load_si128((__m128i *)&lowline[column]); @@ -20474,7 +20474,7 @@ void InvertHorizontalRowDuplicated16s(PIXEL *lowpass, // Row of horizontal lo // Start processing at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass coefficients low1_epi16 = _mm_load_si128((__m128i *)&lowpass[column]); @@ -21230,7 +21230,7 @@ void InvertSpatialQuant16s(PIXEL *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); __m128i *even_lowpass_ptr = (__m128i *)even_lowpass; @@ -21252,7 +21252,7 @@ void InvertSpatialQuant16s(PIXEL *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -21466,7 +21466,7 @@ void InvertSpatialQuant16s(PIXEL *lowlow_band, int lowlow_pitch, } } -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -21772,7 +21772,7 @@ void InvertSpatialQuantDescale16s(PIXEL *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); __m128i *even_lowpass_ptr = (__m128i *)even_lowpass; @@ -21794,7 +21794,7 @@ void InvertSpatialQuantDescale16s(PIXEL *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -22205,7 +22205,7 @@ void InvertSpatialQuantDescale16s(PIXEL *lowlow_band, int lowlow_pitch, } -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -22517,7 +22517,7 @@ void InvertSpatialPrescaledQuant16s(PIXEL *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = width - (width % column_step); __m64 *even_lowpass_ptr = (__m64 *)even_lowpass; @@ -22539,7 +22539,7 @@ void InvertSpatialPrescaledQuant16s(PIXEL *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -22768,7 +22768,7 @@ void InvertSpatialPrescaledQuant16s(PIXEL *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -23203,7 +23203,7 @@ void InvertSpatialQuant1x16s(PIXEL *lowlow_band, int lowlow_pitch, } } -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -23533,7 +23533,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = width - (width % column_step); __m64 *even_lowpass_ptr = (__m64 *)even_lowpass; @@ -23550,7 +23550,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -23785,7 +23785,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -24097,7 +24097,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = width - (width % column_step); __m128i *even_lowpass_ptr = (__m128i *)even_lowpass; @@ -24114,7 +24114,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -24351,7 +24351,7 @@ void InvertSpatialPrescaledQuant8s(PIXEL8S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -24679,7 +24679,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = roi.width - (roi.width % column_step); __m64 *even_lowpass_ptr = (__m64 *)even_lowpass; @@ -24696,7 +24696,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -24926,7 +24926,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -25227,7 +25227,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = roi.width - (roi.width % column_step); __m128i *even_lowpass_ptr = (__m128i *)even_lowpass; @@ -25244,7 +25244,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -25476,7 +25476,7 @@ void InvertSpatial8sTo16s(PIXEL8S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -25808,7 +25808,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 4; int post_column = roi.width - (roi.width % column_step); __m64 *even_lowpass_ptr = (__m64 *)even_lowpass; @@ -25830,7 +25830,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -26068,7 +26068,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -26384,7 +26384,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, // Process the middle rows using the interior reconstruction filters for (; row < last_row; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = roi.width - (roi.width % column_step); __m128i *even_lowpass_ptr = (__m128i *)even_lowpass; @@ -26406,7 +26406,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, // Start at the first column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process groups of four coefficients along the row for (; column < post_column; column += column_step) @@ -26646,7 +26646,7 @@ void InvertSpatial16sTo16s(PIXEL16S *lowlow_band, int lowlow_pitch, //_mm_empty(); // Clear the mmx register state -#if (1 && XMMOPT) +#if (XMMOPT) // Need to advance the lowpass pointer if using SIMD instructions lowlow += lowlow_pitch; lowhigh += lowhigh_pitch; @@ -27360,7 +27360,7 @@ void InvertHorizontalStripRGB16sToPackedYUV8u(PIXEL *lowpass_band[], // Horizont // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i gg_low1_epi16; // Lowpass coefficients __m128i gg_low2_epi16; __m128i bg_low1_epi16; @@ -27544,7 +27544,7 @@ void InvertHorizontalStripRGB16sToPackedYUV8u(PIXEL *lowpass_band[], // Horizont // Save the value for use in the fast loop rg_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients gg_low1_epi16 = _mm_load_si128((__m128i *)&gg_lowpass_ptr[0]); @@ -28750,7 +28750,7 @@ InvertHorizontalStripYUV16sToPackedRGB32(HorizontalFilterParams) // Target pix // Process each row of the strip for (row = 0; row < height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i y_low1_epi16; // Lowpass coefficients __m128i y_low2_epi16; __m128i u_low1_epi16; @@ -28931,7 +28931,7 @@ InvertHorizontalStripYUV16sToPackedRGB32(HorizontalFilterParams) // Target pix // Save the value for use in the fast loop v_odd_value = odd; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first eight lowpass luma coefficients y_low1_epi16 = _mm_load_si128((__m128i *)&y_lowpass_ptr[0]); @@ -30681,7 +30681,7 @@ InvertSpatialMiddleRow16sToOutput(DECODER *decoder, int thread_index, PIXEL *low // Start at the first column int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) const int column_step = 8; int post_column = width - (width % column_step); diff --git a/Codec/temporal.c b/Codec/temporal.c index 9db8e38..432a831 100644 --- a/Codec/temporal.c +++ b/Codec/temporal.c @@ -115,7 +115,7 @@ void FilterTemporal(PIXEL *field1, int pitch1, PIXEL *field2, int pitch2, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the input values (which may be overwritten) input1_pi16 = *(input1_ptr++); @@ -225,7 +225,7 @@ void FilterTemporal(PIXEL *field1, int pitch1, PIXEL *field2, int pitch2, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -354,7 +354,7 @@ void FilterTemporal16s(PIXEL *field1, int pitch1, PIXEL *field2, int pitch2, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process column elements in parallel until end of row processing is required for (; column < post_column; column += column_step) @@ -525,7 +525,7 @@ void FilterTemporal16s(PIXEL *field1, int pitch1, PIXEL *field2, int pitch2, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process column elements in parallel until end of row processing is required for (; column < post_column; column += column_step) @@ -681,7 +681,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *input1_ptr = (__m64 *)row1; __m64 *input2_ptr = (__m64 *)row2; @@ -703,7 +703,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned //assert(ISALIGNED16(input1_ptr)); @@ -817,7 +817,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -839,7 +839,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -941,7 +941,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -964,7 +964,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -1024,7 +1024,7 @@ void FilterTemporalRow8uTo16s(PIXEL8U *row1, PIXEL8U *row2, int length, // Start at the first column of the second row of pixels column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Reset the lowpass pointer to the begininning of the buffer lowpass_ptr = (__m128i *)lowpass; @@ -1116,7 +1116,7 @@ void FilterTemporalRow16s(PIXEL *row1, PIXEL *row2, int length, int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -1140,7 +1140,7 @@ void FilterTemporalRow16s(PIXEL *row1, PIXEL *row2, int length, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -1216,7 +1216,7 @@ void FilterTemporalRowYUVTo16s(BYTE *row1, BYTE *row2, int frame_width, int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -1239,7 +1239,7 @@ void FilterTemporalRowYUVTo16s(BYTE *row1, BYTE *row2, int frame_width, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -1455,7 +1455,7 @@ void FilterTemporalRowYUYVChannelTo16s(BYTE *row1, BYTE *row2, int frame_width, int length = frame_width * 2; int column; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 16; int post_column = length - (length % column_step); @@ -1479,7 +1479,7 @@ void FilterTemporalRowYUYVChannelTo16s(BYTE *row1, BYTE *row2, int frame_width, column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned //assert(ISALIGNED16(input1_ptr)); @@ -1805,7 +1805,7 @@ void FilterTemporalRowYUYVChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w int column; int shift = precision - 8; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -1834,7 +1834,7 @@ void FilterTemporalRowYUYVChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w // Start processing at the first (leftmost) column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -2340,7 +2340,7 @@ void FilterTemporalRowUYVYChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w int post_column = length - (length % column_step); int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *input1_ptr = (__m64 *)row1; __m64 *input2_ptr = (__m64 *)row2; @@ -2362,7 +2362,7 @@ void FilterTemporalRowUYVYChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned //assert(ISALIGNED16(input1_ptr)); @@ -2689,7 +2689,7 @@ void FilterTemporalRowUYVYChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w int column; int shift = precision - 8; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *input1_ptr = (__m128i *)row1; __m128i *input2_ptr = (__m128i *)row2; @@ -2719,7 +2719,7 @@ void FilterTemporalRowUYVYChannelTo16s(uint8_t *row1, uint8_t *row2, int frame_w // Start processing at the first (leftmost) column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(input1_ptr)); @@ -3358,7 +3358,7 @@ void FilterInterlaced(PIXEL *frame, int frame_pitch, for (row = 0; row < roi.height; row += 2) { -#if (1 && MMXOPT) +#if (MMXOPT) __m64 *evenptr = (__m64 *)frame; __m64 *oddptr = (__m64 *)(frame + frame_pitch); @@ -3555,7 +3555,7 @@ void InvertInterlaced16s(PIXEL *lowpass, int lowpass_pitch, // Process each pair of lowpass and highpass rows for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *lowptr = (__m64 *)lowpass; __m64 *highptr = (__m64 *)highpass; __m64 *evenptr = (__m64 *)even; @@ -3566,7 +3566,7 @@ void InvertInterlaced16s(PIXEL *lowpass, int lowpass_pitch, // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // MMX optimization for (; column < post_column; column += column_step) { @@ -3644,7 +3644,7 @@ void InvertInterlaced16s(PIXEL *lowpass, int lowpass_pitch, // Process each pair of lowpass and highpass rows for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) // SSE2 optimization __m128i *lowptr = (__m128i *)lowpass; __m128i *highptr = (__m128i *)highpass; @@ -3656,7 +3656,7 @@ void InvertInterlaced16s(PIXEL *lowpass, int lowpass_pitch, // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // SSE2 optimization for (; column < post_column; column += column_step) { @@ -3750,7 +3750,7 @@ void InvertInterlaced16sTo8u(PIXEL16S *lowpass, int lowpass_pitch, // Process each pair of lowpass and highpass rows for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = roi.width - (roi.width % column_step); int preload_column = post_column - column_step; @@ -3775,7 +3775,7 @@ void InvertInterlaced16sTo8u(PIXEL16S *lowpass, int lowpass_pitch, // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload four lowpass and four highpass coefficients low1_pi16 = *(low_ptr++); @@ -3893,7 +3893,7 @@ void InvertInterlaced16sTo8u(PIXEL16S *lowpass, int lowpass_pitch, // Process each pair of lowpass and highpass rows for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 16; int post_column = roi.width - (roi.width % column_step); int preload_column = post_column - column_step; @@ -3916,7 +3916,7 @@ void InvertInterlaced16sTo8u(PIXEL16S *lowpass, int lowpass_pitch, // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload four lowpass and four highpass coefficients low1_epi16 = _mm_load_si128(low_ptr++); @@ -4038,7 +4038,7 @@ void InvertInterlaced16sToYUV(PIXEL16S *y_lowpass, int y_lowpass_pitch, // Process each pair of lowpass and highpass rows for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = roi.width / 2; int column_step = 16; int post_column = chroma_width - (chroma_width % column_step); @@ -4064,7 +4064,7 @@ void InvertInterlaced16sToYUV(PIXEL16S *y_lowpass, int y_lowpass_pitch, // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { __m128i low1_epi16; @@ -4362,7 +4362,7 @@ void InvertInterlacedRow16sToYUV(PIXEL *lowpass[], PIXEL *highpass[], int num_ch uint8_t *odd_field = even_field + pitch; int row, column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -4404,7 +4404,7 @@ void InvertInterlacedRow16sToYUV(PIXEL *lowpass[], PIXEL *highpass[], int num_ch // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -4851,7 +4851,7 @@ void InvertInterlacedRow16sToYUV(PIXEL *lowpass[], PIXEL *highpass[], int num_ch uint8_t *odd_field = output + pitch; int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -5854,7 +5854,7 @@ void InvertInterlacedRow16s10bitToYUV(PIXEL *lowpass[], PIXEL *highpass[], int n uint8_t *odd_field = even_field + pitch; int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -5916,7 +5916,7 @@ void InvertInterlacedRow16s10bitToYUV(PIXEL *lowpass[], PIXEL *highpass[], int n // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Shorter loop that processes 16 output pixels per iteration assert(column_step == 16); @@ -6392,7 +6392,7 @@ void InvertInterlacedRow16s10bitToUYVY(PIXEL *lowpass[], PIXEL *highpass[], int uint8_t *odd_field = even_field + pitch; int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -6454,7 +6454,7 @@ void InvertInterlacedRow16s10bitToUYVY(PIXEL *lowpass[], PIXEL *highpass[], int // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Shorter loop that processes 16 output pixels per iteration assert(column_step == 16); @@ -6984,7 +6984,7 @@ void InvertInterlacedRow16sToRow16u(PIXEL *lowpass, PIXEL *highpass, // Compute the shift required to scale the results to sixteen bits int scale = (precision == CODEC_PRECISION_8BIT) ? 8 : 6; -#if (1 && XMMOPT) +#if (XMMOPT) int column_step = 8; int post_column = output_width - (output_width % column_step); @@ -7005,7 +7005,7 @@ void InvertInterlacedRow16sToRow16u(PIXEL *lowpass, PIXEL *highpass, // Start at the beginning of the row int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the pointers to the groups of pixels are properly aligned assert(ISALIGNED16(lowpass_ptr)); @@ -7187,7 +7187,7 @@ void InvertInterlacedRow16s(PIXEL *lowpass[], PIXEL *highpass[], int num_channel int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -7229,7 +7229,7 @@ void InvertInterlacedRow16s(PIXEL *lowpass[], PIXEL *highpass[], int num_channel // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { @@ -7630,7 +7630,7 @@ void InvertInterlacedRow16s(PIXEL *lowpass[], PIXEL *highpass[], int num_channel int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -7671,7 +7671,7 @@ void InvertInterlacedRow16s(PIXEL *lowpass[], PIXEL *highpass[], int num_channel // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Shorter loop that processes 16 output pixels per iteration assert(column_step == 16); @@ -8222,7 +8222,7 @@ void InvertInterlacedRow16sToV210(PIXEL *lowpass[], PIXEL *highpass[], int num_c int column; -#if (1 && XMMOPT) +#if (XMMOPT) int chroma_width = output_width / 2; int chroma_step = 8; int post_column = 2 * (chroma_width - (chroma_width % chroma_step)); @@ -8265,7 +8265,7 @@ void InvertInterlacedRow16sToV210(PIXEL *lowpass[], PIXEL *highpass[], int num_c // Start at the beginning of the row column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Shorter loop that processes 16 output pixels per iteration assert(column_step == 16); @@ -9153,7 +9153,7 @@ void InvertTemporalQuant16s(PIXEL *lowpass, int lowpass_quantization, int lowpas // Process a pair of rows from each field for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *lowpass_ptr = (__m64 *)lowpass; __m64 *highpass_ptr; __m64 *even_ptr = (__m64 *)field1; @@ -9183,7 +9183,7 @@ void InvertTemporalQuant16s(PIXEL *lowpass, int lowpass_quantization, int lowpas // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass and highpass coefficients low1_pi16 = *(lowpass_ptr++); @@ -9339,7 +9339,7 @@ void InvertTemporalQuant16s(PIXEL *lowpass, int lowpass_quantization, int lowpas // Process a pair of rows from each field for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *lowpass_ptr = (__m128i *)lowpass; __m128i *highpass_ptr; __m128i *even_ptr = (__m128i *)field1; @@ -9376,7 +9376,7 @@ void InvertTemporalQuant16s(PIXEL *lowpass, int lowpass_quantization, int lowpas // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned assert(ISALIGNED16(lowpass_ptr)); @@ -9421,7 +9421,7 @@ void InvertTemporalQuant16s(PIXEL *lowpass, int lowpass_quantization, int lowpas //assert(ISALIGNED16(lowpass_ptr)); //assert(ISALIGNED16(highpass_ptr)); -#if (1 && PREFETCH) +#if (PREFETCH) // Prefetch input data that may be used in the near future _mm_prefetch((const char *)lowpass_ptr + prefetch_offset, _MM_HINT_T2); _mm_prefetch((const char *)highpass_ptr + prefetch_offset, _MM_HINT_T2); @@ -9670,7 +9670,7 @@ void InvertTemporalQuant16s8sTo16s(PIXEL *lowpass, int lowpass_quantization, int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass and highpass coefficients low1_pi16 = *(lowpass_ptr++); @@ -9802,7 +9802,7 @@ void InvertTemporalQuant16s8sTo16s(PIXEL *lowpass, int lowpass_quantization, int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process column elements in parallel until end of row processing is required for (; column < post_column; column += column_step) @@ -9976,7 +9976,7 @@ void InvertTemporalRow16s(PIXEL *lowpass, PIXEL *highpass, int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Process column elements in parallel until end of row processing is required for (; column < post_column; column += column_step) @@ -10078,7 +10078,7 @@ void InvertTemporalQuarterRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *even, P // Process a pair of rows from each field for (row = 0; row < roi.height; row++) { -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *lowpass_ptr = (__m64 *)lowpass; __m64 *highpass_ptr; __m64 *even_ptr = (__m64 *)field1; @@ -10092,7 +10092,7 @@ void InvertTemporalQuarterRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *even, P // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Preload the first four lowpass and highpass coefficients low1_pi16 = *(lowpass_ptr++); @@ -10189,7 +10189,7 @@ void InvertTemporalQuarterRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *even, P int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *low_ptr = (__m128i *)lowpass; __m128i *high_ptr = (__m128i *)highpass; __m128i *even_ptr = (__m128i *)even; @@ -10211,7 +10211,7 @@ void InvertTemporalQuarterRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *even, P // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned assert(ISALIGNED16(low_ptr)); @@ -10439,7 +10439,7 @@ void InvertTemporalQuarterEvenRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *out int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *low_ptr = (__m64 *)lowpass; __m64 *high_ptr = (__m64 *)highpass; __m64 *even_ptr = (__m64 *)output; @@ -10463,7 +10463,7 @@ void InvertTemporalQuarterEvenRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *out // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned //assert(ISALIGNED16(low_ptr)); @@ -10649,7 +10649,7 @@ void InvertTemporalQuarterEvenRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *out int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *low_ptr = (__m128i *)lowpass; __m128i *high_ptr = (__m128i *)highpass; __m128i *even_ptr = (__m128i *)output; @@ -10673,7 +10673,7 @@ void InvertTemporalQuarterEvenRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *out // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned assert(ISALIGNED16(low_ptr)); @@ -10868,7 +10868,7 @@ void InvertTemporalQuarterOddRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *outp int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m64 *low_ptr = (__m64 *)lowpass; __m64 *high_ptr = (__m64 *)highpass; __m64 *odd_ptr = (__m64 *)output; @@ -10892,7 +10892,7 @@ void InvertTemporalQuarterOddRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *outp // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned //assert(ISALIGNED16(low_ptr)); @@ -11078,7 +11078,7 @@ void InvertTemporalQuarterOddRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *outp int column; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *low_ptr = (__m128i *)lowpass; __m128i *high_ptr = (__m128i *)highpass; __m128i *odd_ptr = (__m128i *)output; @@ -11102,7 +11102,7 @@ void InvertTemporalQuarterOddRow16s(PIXEL *lowpass, PIXEL *highpass, PIXEL *outp // Start at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) // Check that the input and output addresses are properly aligned assert(ISALIGNED16(low_ptr)); @@ -11288,7 +11288,7 @@ void CopyQuarterRowToBuffer(PIXEL **input, int column; -#if (1 && XMMOPT) +#if (XMMOPT) // Process sixteen values of luma and chroma per loop iteration const int column_step = 16; @@ -11309,7 +11309,7 @@ void CopyQuarterRowToBuffer(PIXEL **input, // Start procesing at the left column column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) for (; column < post_column; column += column_step) { diff --git a/Codec/vlc.c b/Codec/vlc.c index d7eacc2..34097af 100644 --- a/Codec/vlc.c +++ b/Codec/vlc.c @@ -1,23 +1,21 @@ -/*! @file vlc.c - -* @brief Variable Length Coding tools -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -*/ +/*! + * @file vlc.c + * @brief Variable Length Coding tools + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include "config.h" #include "timing.h" @@ -27,7 +25,6 @@ #define TIMING (1 && _TIMING) #define XMMOPT (1 && _XMMOPT) - #include "vlc.h" #include "codec.h" #include "bitstream.h" @@ -259,7 +256,7 @@ void PutVlcByte(BITSTREAM *stream, int value, VALBOOK *codebook) } #endif -#if (1 && TIMING) +#if (TIMING) putvlcbyte_count++; #endif } @@ -287,7 +284,7 @@ int32_t GetVlc(BITSTREAM *stream, VLCBOOK *codebook) size = code->size; } -#if (1 && DEBUG) +#if (DEBUG) // Check that the codeword does not contain extra bits if (size < BITSTREAM_LONG_SIZE) { @@ -403,7 +400,7 @@ void PutZeroRun(BITSTREAM *stream, int count, RLCBOOK *codebook) // Should have output enough runs to cover the run of zeros assert(count == 0); -#if (1 && TIMING) +#if (TIMING) putzerorun_count++; #endif } @@ -438,7 +435,7 @@ void PutFastRun(BITSTREAM *stream, int count, RLCBOOK *codebook) // Should have output enough runs to cover the run of zeros //assert(count == 0); -#if (1 && TIMING) +#if (TIMING) putzerorun_count++; #endif } diff --git a/Codec/wavelet.c b/Codec/wavelet.c index 87191fe..040680b 100644 --- a/Codec/wavelet.c +++ b/Codec/wavelet.c @@ -2266,7 +2266,7 @@ void TransformInverseTemporal(IMAGE *temporal, IMAGE *frame0, IMAGE *frame1) { int column = 0; -#if (1 && XMMOPT) +#if (XMMOPT) __m128i *low_ptr = (__m128i *)lowpass; __m128i *high_ptr = (__m128i *)highpass; diff --git a/EncoderSDK/MetadataWriter.cpp b/EncoderSDK/MetadataWriter.cpp index cc79190..bd05984 100644 --- a/EncoderSDK/MetadataWriter.cpp +++ b/EncoderSDK/MetadataWriter.cpp @@ -1,22 +1,19 @@ -/*! @file MetadataWriter.cpp - -* @brief -* -* @version 1.0.0 -* -* (C) Copyright 2017 GoPro Inc (http://gopro.com/). -* -* Licensed under either: -* - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 -* - MIT license, http://opensource.org/licenses/MIT -* at your option. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ +/*! + * @file MetadataWriter.cpp + * + * (C) Copyright 2017 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "StdAfx.h" @@ -40,13 +37,13 @@ #include "SampleEncoder.h" -/* Table of CRCs of all 8-bit messages. */ +//! Table of CRCs of all 8-bit messages. static unsigned long look_crc_table[256]; -/* Flag: has the table been computed? Initially false. */ +//! Flag: has the table been computed? Initially false. static int look_crc_table_computed = 0; -/* Make the table for a fast CRC. */ +//! Make the table for a fast CRC. void look_make_crc_table(void) { unsigned long c; @@ -93,8 +90,7 @@ unsigned long look_calc_crc(unsigned char *buf, int len) return look_update_crc(0xffffffffL, buf, len) ^ 0xffffffffL; } - -#define OUTPUT 0 +#define OUTPUT 0 #define BUFSIZE 1024 uint32_t ValidateLookGenCRCEnc(char *path) @@ -148,7 +144,6 @@ uint32_t ValidateLookGenCRCEnc(char *path) pos += 5; LUTfound = true; break; - } pos++; } while (pos < len - 5); @@ -196,7 +191,6 @@ uint32_t ValidateLookGenCRCEnc(char *path) SIZEfound = true; break; - } pos++; } while (pos < len - 10); @@ -209,13 +203,11 @@ uint32_t ValidateLookGenCRCEnc(char *path) { int j = 0; pos += 6; - while ( !( (buf[pos + j] >= '0' && buf[pos + j] <= '9') || - (buf[pos + j] >= 'a' && buf[pos + j] <= 'f') || - (buf[pos + j] >= 'A' && buf[pos + j] <= 'F') )) + while (!( (buf[pos + j] >= '0' && buf[pos + j] <= '9') || + (buf[pos + j] >= 'a' && buf[pos + j] <= 'f') || + (buf[pos + j] >= 'A' && buf[pos + j] <= 'F') )) pos++; - //printf("%s\n",&buf[pos+j]); - DATAfound = true; break; @@ -228,9 +220,9 @@ uint32_t ValidateLookGenCRCEnc(char *path) char hexstring[12] = "00000000"; do { - while ( !( (buf[pos] >= '0' && buf[pos] <= '9') || - (buf[pos] >= 'a' && buf[pos] <= 'f') || - (buf[pos] >= 'A' && buf[pos] <= 'F') )) + while (!( (buf[pos] >= '0' && buf[pos] <= '9') || + (buf[pos] >= 'a' && buf[pos] <= 'f') || + (buf[pos] >= 'A' && buf[pos] <= 'F') )) { if (buf[pos] == '"' || buf[pos] == '<') { @@ -255,8 +247,7 @@ uint32_t ValidateLookGenCRCEnc(char *path) hexstring[6] = buf[pos + 0]; hexstring[7] = buf[pos + 1]; - //printf("%s",hexstring); -#if defined(_WIN32) && !defined(__GNUC__) +#ifdef _MSVC_VER sscanf_s(hexstring, "%08x", (int *)&val); #else sscanf(hexstring, "%08x", (int *)&val); @@ -286,7 +277,6 @@ uint32_t ValidateLookGenCRCEnc(char *path) } while (pos < len - 16 && !finished); } - // printf("len = %d\n", len); } while (len > 0 && !finished); fclose(fp); @@ -300,6 +290,7 @@ uint32_t ValidateLookGenCRCEnc(char *path) free(LUT); free(iLUT); } + return crc; } @@ -319,8 +310,8 @@ CFHD_Error CSampleEncodeMetadata::AddGUID() } CFHD_Error CSampleEncodeMetadata::AddLookFile(METADATA_TYPE ctype, - METADATA_SIZE size, - uint32_t *data) + METADATA_SIZE size, + uint32_t *data) { //TODO: Replace uses of unsigned int with size_t assert(size <= METADATA_SIZE_MAX); @@ -381,6 +372,7 @@ CFHD_Error CSampleEncodeMetadata::AddTimeStamp(const char *date, const char *tim { return CFHD_ERROR_OKAY; } + return CFHD_ERROR_UNEXPECTED; } @@ -413,17 +405,14 @@ CFHD_Error CSampleEncodeMetadata::AddFrameNumber(uint32_t framenum, bool local_m return CFHD_ERROR_OKAY; } - - - /*! - @brief Free a metadata buffer (local or global) - - The low level API in the codec library takes pointers to the buffer - pointer and size. This routine intentionally allows the low level API - to clear the local values passed as arguments. The caller should set - the buffer pointer to null and the buffer size to zero. -*/ + * @brief Free a metadata buffer (local or global) + * + * The low level API in the codec library takes pointers to the buffer + * pointer and size. This routine intentionally allows the low level API + * to clear the local values passed as arguments. The caller should set + * the buffer pointer to null and the buffer size to zero. + */ void CSampleEncodeMetadata::ReleaseMetadata(METADATA *metadata) { FreeMetadata(metadata);