Skip to content

Commit

Permalink
add iOS and armv8 build (#1014)
Browse files Browse the repository at this point in the history
add iOS and armv8 build (#1014)
Co-authored-by: Nikolay Bogoychev <[email protected]>
Co-authored-by: Varun Mathur <[email protected]>
  • Loading branch information
vrnmthr authored Feb 16, 2024
1 parent ba5df66 commit 2d067af
Show file tree
Hide file tree
Showing 20 changed files with 1,435 additions and 71 deletions.
43 changes: 43 additions & 0 deletions .github/workflows/ios.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: iOS

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build-macos:
name: iOS CPU-only
runs-on: macos-12

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install dependencies
run: brew install boost openblas openssl protobuf

- name: Configure CMake
run: |
export LDFLAGS="-L/usr/local/opt/openblas/lib"
export CPPFLAGS="-I/usr/local/opt/openblas/include"
mkdir -p build
cd build
cmake .. \
-DCOMPILE_CPU=on \
-DCOMPILE_CUDA=off \
-DCOMPILE_EXAMPLES=on \
-DCOMPILE_SERVER=off \
-DCOMPILE_TESTS=on \
-DUSE_SENTENCEPIECE=on \
-DCMAKE_TOOLCHAIN_FILE=../cmake/ios.toolchain.cmake \
-DUSE_SENTENCEPIECE=on \
-DPLATFORM=OS64 \
-DDEPLOYMENT_TARGET=13.0
- name: Compile
working-directory: build
run: cmake --build . --config Release
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@
[submodule "src/3rd_party/simple-websocket-server"]
path = src/3rd_party/simple-websocket-server
url = https://github.com/marian-nmt/Simple-WebSocket-Server
[submodule "src/3rd_party/ruy"]
path = src/3rd_party/ruy
url = https://github.com/google/ruy.git
[submodule "src/3rd_party/simd_utils"]
path = src/3rd_party/simd_utils
url = https://github.com/JishinMaster/simd_utils.git
75 changes: 59 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ if (POLICY CMP0074)
endif ()

project(marian CXX C)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
Expand Down Expand Up @@ -80,6 +81,40 @@ if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()

# iOS support
if(CMAKE_SYSTEM_NAME STREQUAL "iOS" )
set(ARM ON)
# need to ignore this warning for Xcode to be happy
list(APPEND ALL_WARNINGS -Wno-shorten-64-to-32;)
endif()

# ARM support: currently ONLY armv8. armv8 includes NEON by default
# we do not currently have good support for automatic architecture detection, including for cross-compilation
# this is planned for future PRs
if(ARM)

# Apple by default has Apple Accelerate. Otherwise fallback to RUY for GEMM
if(APPLE)
message(STATUS "Using Apple Accelerate SGEMM")
option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF)
else(APPLE)
message(STATUS "Using Ruy SGEMM")
set(EXT_LIBS ${EXT_LIBS} ruy)
option(USE_RUY_SGEMM "Compile with Ruy SGEMM" ON)
endif(APPLE)

# Define that we are using ARM as required by simd_utils. See their README for info
add_compile_definitions(ARM FMA SSE)
# Some warnings as errors. I don't feel comfortable about the strict aliasing.
set(ARM_WARNINGS "-fno-strict-aliasing -Wno-comment")

if(MSVC)
add_compile_options(/flax-vector-conversions)
else(MSVC)
add_compile_options(-flax-vector-conversions)
endif(MSVC)
endif(ARM)

########
# pThreads: consider it as EXT_LIBS for a more portable binary
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
Expand All @@ -88,7 +123,6 @@ find_package(Threads REQUIRED)
set(EXT_LIBS ${EXT_LIBS} Threads::Threads)
########


###############################################################################
# Set compilation flags
if(MSVC)
Expand Down Expand Up @@ -148,13 +182,16 @@ else(MSVC)
set(INTRINSICS "")
list(APPEND INTRINSICS_NVCC)

option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
if(NOT ARM)
# none of these options are available on ARM
option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
endif(NOT ARM)

if(BUILD_ARCH STREQUAL "native")
message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.")
Expand Down Expand Up @@ -230,7 +267,7 @@ else(MSVC)
# Clang-10.0.0 complains when CUDA is newer than 10.1
set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-warning-option -Wno-unknown-cuda-version")
endif()
set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA}")
set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA} ${ARM_WARNINGS}")

# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated;
Expand All @@ -249,24 +286,30 @@ else(MSVC)
set(CMAKE_RDYNAMIC_FLAG "-rdynamic")
endif(CMAKE_COMPILER_IS_GNUCC)

set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG")
set(CMAKE_CXX_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg")
set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")

# these need to be set separately
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG")
set(CMAKE_C_FLAGS_SLIM "-O3 -funroll-loops -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction")
set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction")

# set -march for all builds except iOS cross compilation
if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS" )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${BUILD_ARCH}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${BUILD_ARCH}")
endif()
endif(MSVC)

# with gcc 7.0 and above we need to mark fallthrough in switch case statements
Expand Down Expand Up @@ -520,7 +563,7 @@ endif()
###############################################################################
# Find BLAS library
if(COMPILE_CPU)
if(NOT GENERATE_MARIAN_INSTALL_TARGETS)
if(NOT GENERATE_MARIAN_INSTALL_TARGETS AND NOT ARM)
set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU
add_definitions(-DCOMPILE_CPU=1)
endif()
Expand Down
Loading

0 comments on commit 2d067af

Please sign in to comment.