0% found this document useful (0 votes)

87 views

CMake Lists

This document contains CMake configuration code for a project called "llama.cpp". It sets CMake options, finds dependencies, configures compiler flags, and enables Metal and other backend options depending on the platform.

Uploaded by

Attiogbe Samuel

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

87 views

CMake Lists

Uploaded by

Attiogbe Samuel

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 24

cmake_minimum_required(VERSION 3.

14) # for add_link_options and implicit target

directories.
project("llama.cpp" C CXX)
include(CheckIncludeFileCXX)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)

set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
"MinSizeRel" "RelWithDebInfo")
endif()

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)

set(LLAMA_STANDALONE ON)

# configure project version

# TODO
else()
set(LLAMA_STANDALONE OFF)
endif()

if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js"

ON)
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
else()
set(BUILD_SHARED_LIBS_DEFAULT ON)
endif()
endif()

#
# Option list
#

if (APPLE)
set(LLAMA_METAL_DEFAULT ON)
else()
set(LLAMA_METAL_DEFAULT OFF)
endif()

# general
option(BUILD_SHARED_LIBS "build shared libraries"
OFF)
option(LLAMA_STATIC "llama: static link libraries"
OFF)
option(LLAMA_NATIVE "llama: enable -march=native flag"
ON)
option(LLAMA_LTO "llama: enable link time optimization"
OFF)
option(LLAMA_CCACHE "llama: use ccache if available"
ON)
# debug
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings"
ON)
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd
party libs" OFF)
option(LLAMA_GPROF "llama: enable gprof"
OFF)

# build
option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag"
OFF)

# sanitizers
option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer"
OFF)
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer"
OFF)
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer"
OFF)

# instruction set specific

if (LLAMA_NATIVE)
set(INS_ENB OFF)
else()
set(INS_ENB ON)
endif()

option(LLAMA_AVX "llama: enable AVX"

${INS_ENB})
option(LLAMA_AVX2 "llama: enable AVX2"
${INS_ENB})
option(LLAMA_AVX512 "llama: enable AVX512"
OFF)
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI"
OFF)
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI"
OFF)
option(LLAMA_FMA "llama: enable FMA"
${INS_ENB})
# in MSVC F16C is implied with AVX2/AVX512
if (NOT MSVC)
option(LLAMA_F16C "llama: enable F16C"
${INS_ENB})
endif()

if (WIN32)
set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version")
endif()

# 3rd party libs

option(LLAMA_ACCELERATE "llama: enable Accelerate framework"
ON)
option(LLAMA_BLAS "llama: use BLAS"
OFF)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use CUDA"
OFF)
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt
processing" OFF)
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA
kernels" OFF)
option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of
cuBLAS" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA
kernels")
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA
kernels")
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some
calculations" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for
Q2_K/Q6_K")
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
"llama: max. batch size for using peer
access")
option(LLAMA_HIPBLAS "llama: use hipBLAS"
OFF)
option(LLAMA_HIP_UMA "llama: use HIP unified memory
architecture" OFF)
option(LLAMA_CLBLAST "llama: use CLBlast"
OFF)
option(LLAMA_VULKAN "llama: use Vulkan"
OFF)
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks"
OFF)
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output"
OFF)
option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation"
OFF)
option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests"
OFF)
option(LLAMA_METAL "llama: use Metal"
${LLAMA_METAL_DEFAULT})
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging"
OFF)
option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-
math" OFF)
option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library"
OFF)
option(LLAMA_KOMPUTE "llama: use Kompute"
OFF)
option(LLAMA_MPI "llama: use MPI"
OFF)
option(LLAMA_QKK_64 "llama: use super-block size of 64 for
k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL"
OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl
calculations" OFF)
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM"
OFF)

option(LLAMA_BUILD_TESTS "llama: build tests" $

{LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" $
{LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example"
ON)
# add perf arguments
option(LLAMA_PERF "llama: enable perf"
OFF)

# Required for relocatable CMake package

include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)

#
# Compile flags
#

if (LLAMA_SYCL)
set(CMAKE_CXX_STANDARD 17)
else()
set(CMAKE_CXX_STANDARD 11)
endif()

set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
set(THREADS_PREFER_PTHREAD_FLAG ON)

find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)

# enable libstdc++ assertions for debug builds

if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
endif()

if (NOT MSVC)
if (LLAMA_SANITIZE_THREAD)
add_compile_options(-fsanitize=thread)
link_libraries (-fsanitize=thread)
endif()

if (LLAMA_SANITIZE_ADDRESS)
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
link_libraries (-fsanitize=address)
endif()

if (LLAMA_SANITIZE_UNDEFINED)
add_compile_options(-fsanitize=undefined)
link_libraries (-fsanitize=undefined)
endif()
endif()

if (APPLE AND LLAMA_ACCELERATE)

find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
message(STATUS "Accelerate framework found")

add_compile_definitions(GGML_USE_ACCELERATE)
add_compile_definitions(ACCELERATE_NEW_LAPACK)
add_compile_definitions(ACCELERATE_LAPACK_ILP64)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
else()
message(WARNING "Accelerate framework not found")
endif()
endif()

if (LLAMA_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)

message(STATUS "Metal framework found")

set(GGML_HEADERS_METAL ggml-metal.h)
set(GGML_SOURCES_METAL ggml-metal.m)

add_compile_definitions(GGML_USE_METAL)
if (LLAMA_METAL_NDEBUG)
add_compile_definitions(GGML_METAL_NDEBUG)
endif()

# get full path to the file

#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")

# copy ggml-metal.metal to bin directory

configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-
metal.metal COPYONLY)

if (LLAMA_METAL_EMBED_LIBRARY)
enable_language(ASM)
add_compile_definitions(GGML_METAL_EMBED_LIBRARY)

set(METALLIB_SOURCE "${CMAKE_SOURCE_DIR}/ggml-metal.metal")
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-
metallib.s")

add_custom_command(
OUTPUT ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".section __DATA,__ggml_metallib" > $
{EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".globl _ggml_metallib_start" >> $
{EMBED_METALLIB_ASSEMBLY}
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> $
{EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
DEPENDS ${METALLIB_SOURCE}
COMMENT "Generate assembly for embedded Metal library"
)

set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})

endif()

if (LLAMA_METAL_SHADER_DEBUG)
# custom command to do the following:
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-
metal.air
# xcrun -sdk macosx metallib ggml-metal.air -o
default.metallib
#
# note: this is the only way I found to disable fast-math in Metal. it's
ugly, but at least it works
# disabling fast math is needed in order to pass tests/test-backend-
ops
# note: adding -fno-inline fixes the tests when using
MTL_SHADER_VALIDATION=1
# note: unfortunately, we have to call it default.metallib instead of
ggml.metallib
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
set(XC_FLAGS -fno-fast-math -fno-inline -g)
if (LLAMA_QKK_64)
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64)
endif()

add_custom_command(
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c $
{CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o $
{CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
COMMAND xcrun -sdk macosx metallib $
{CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o $
{CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
DEPENDS ggml-metal.metal
COMMENT "Compiling Metal kernels"
)

add_custom_target(
ggml-metal ALL
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
)
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
)
endif()
if (LLAMA_BLAS)
if (LLAMA_STATIC)
set(BLA_STATIC ON)
endif()
if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22)
set(BLA_SIZEOF_INTEGER 8)
endif()

set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
find_package(BLAS)

if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")

if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")

# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package(PkgConfig REQUIRED)
if (${LLAMA_BLAS_VENDOR} MATCHES "Generic")
pkg_check_modules(DepBLAS REQUIRED blas)
elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS")
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
pkg_check_modules(DepBLAS openblas64)
if (NOT DepBLAS_FOUND)
pkg_check_modules(DepBLAS REQUIRED openblas)
endif()
elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME")
pkg_check_modules(DepBLAS REQUIRED blis)
elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS")
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS")
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel")
# all Intel* libraries share the same include path
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC")
# this doesn't provide pkg-config
# suggest to assign BLAS_INCLUDE_DIRS on your own
if ("${NVHPC_VERSION}" STREQUAL "")
message(WARNING "Better to set NVHPC_VERSION")
else()
set(DepBLAS_FOUND ON)
set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/$
{CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
endif()
endif()
if (DepBLAS_FOUND)
set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been
automatically"
" detected by pkgconfig, trying to find cblas.h from possible
paths...")
find_path(BLAS_INCLUDE_DIRS
NAMES cblas.h
HINTS
/usr/include
/usr/local/include
/usr/include/openblas
/opt/homebrew/opt/openblas/include
/usr/local/opt/openblas/include
/usr/include/x86_64-linux-gnu/openblas/include
)
endif()
endif()

message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")

add_compile_options(${BLAS_LINKER_FLAGS})

add_compile_definitions(GGML_USE_OPENBLAS)

if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES

"Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL)
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})

set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-
vendors"
" to set correct LLAMA_BLAS_VENDOR")
endif()
endif()

if (LLAMA_QKK_64)
add_compile_definitions(GGML_QKK_64)
endif()

if (LLAMA_CUBLAS)
cmake_minimum_required(VERSION 3.17)

find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "cuBLAS found")

enable_language(CUDA)

set(GGML_HEADERS_CUDA ggml-cuda.h)
set(GGML_SOURCES_CUDA ggml-cuda.cu)

add_compile_definitions(GGML_USE_CUBLAS)
if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif()
if (LLAMA_CUDA_FORCE_MMQ)
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
endif()
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
if (DEFINED LLAMA_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for
backwards compatibility
endif()
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
add_compile_definitions(GGML_CUDA_F16)
endif()
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=$
{LLAMA_CUDA_PEER_MAX_BATCH_SIZE})

if (LLAMA_STATIC)
if (WIN32)
# As of 12.3.1 CUDA Tookit for Windows does not offer a static
cublas library
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static
CUDA::cublas CUDA::cublasLt)
else ()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static
CUDA::cublas_static CUDA::cublasLt_static)
endif()
else()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas
CUDA::cublasLt)
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver)

if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels
is faster
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA
intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard +
lowest for integer intrinsics
#set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster,
but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")

else()
message(WARNING "cuBLAS not found")
endif()
endif()

if (LLAMA_MPI)
cmake_minimum_required(VERSION 3.10)
find_package(MPI)
if (MPI_C_FOUND)
message(STATUS "MPI found")

set(GGML_HEADERS_MPI ggml-mpi.h)
set(GGML_SOURCES_MPI ggml-mpi.c)

add_compile_definitions(GGML_USE_MPI)
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})

if (NOT MSVC)
add_compile_options(-Wno-cast-qual)
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})

set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})

# Even if you're only using the C header, C++ programs may bring in MPI
# C++ functions, so more linkage is needed
if (MPI_CXX_FOUND)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES})
endif()
else()
message(WARNING "MPI not found")
endif()
endif()

if (LLAMA_CLBLAST)
find_package(CLBlast)
if (CLBlast_FOUND)
message(STATUS "CLBlast found")

set(GGML_HEADERS_OPENCL ggml-opencl.h)
set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
add_compile_definitions(GGML_USE_CLBLAST)

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)

else()
message(WARNING "CLBlast not found")
endif()
endif()

if (LLAMA_VULKAN)
find_package(Vulkan)
if (Vulkan_FOUND)
message(STATUS "Vulkan found")

set(GGML_HEADERS_VULKAN ggml-vulkan.h)
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)

add_compile_definitions(GGML_USE_VULKAN)

if (LLAMA_VULKAN_CHECK_RESULTS)
add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
endif()

if (LLAMA_VULKAN_DEBUG)
add_compile_definitions(GGML_VULKAN_DEBUG)
endif()

if (LLAMA_VULKAN_VALIDATE)
add_compile_definitions(GGML_VULKAN_VALIDATE)
endif()

if (LLAMA_VULKAN_RUN_TESTS)
add_compile_definitions(GGML_VULKAN_RUN_TESTS)
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)

else()
message(WARNING "Vulkan not found")
endif()
endif()

if (LLAMA_HIPBLAS)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)

if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")

message(WARNING "Only LLVM is supported for HIP, hint:
CC=/opt/rocm/llvm/bin/clang")
endif()

if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")

message(WARNING "Only LLVM is supported for HIP, hint:
CXX=/opt/rocm/llvm/bin/clang++")
endif()

find_package(hip REQUIRED)
find_package(hipblas REQUIRED)
find_package(rocblas REQUIRED)

message(STATUS "HIP and hipBLAS found")

set(GGML_HEADERS_ROCM ggml-cuda.h)
set(GGML_SOURCES_ROCM ggml-cuda.cu)

add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)

if (LLAMA_HIP_UMA)
add_compile_definitions(GGML_HIP_UMA)
endif()

if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif()

if (LLAMA_CUDA_FORCE_MMQ)
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
endif()

add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})

set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)

if (LLAMA_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
endif()

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host

roc::rocblas roc::hipblas)
endif()

if (LLAMA_SYCL)
if ( NOT DEFINED ENV{ONEAPI_ROOT})
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI &
source it, like: source /opt/intel/oneapi/setvars.sh")
endif()
#todo: AOT

find_package(IntelSYCL REQUIRED)

message(STATUS "SYCL found")

add_compile_definitions(GGML_USE_SYCL)

if (LLAMA_SYCL_F16)
add_compile_definitions(GGML_SYCL_F16)
endif()

add_compile_options(-I./) #include DPCT

add_compile_options(-I/${SYCL_INCLUDE_DIR})

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")

set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL
mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib
mkl_core_dll.lib)
else()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m
dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
endif()
endif()

if (LLAMA_KOMPUTE)
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
find_package(Vulkan COMPONENTS glslc REQUIRED)
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
if (NOT glslc_executable)
message(FATAL_ERROR "glslc not found")
endif()

function(compile_shader)
set(options)
set(oneValueArgs)
set(multiValueArgs SOURCES)
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "$
{multiValueArgs}" ${ARGN})
foreach(source ${compile_shader_SOURCES})
get_filename_component(filename ${source} NAME)
set(spv_file ${filename}.spv)
add_custom_command(
OUTPUT ${spv_file}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} $
{CMAKE_CURRENT_SOURCE_DIR}/${source}
COMMENT "Compiling ${source} to ${spv_file}"
)

get_filename_component(RAW_FILE_NAME ${spv_file} NAME)

set(FILE_NAME "shader${RAW_FILE_NAME}")
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
if(CMAKE_GENERATOR MATCHES "Visual Studio")
add_custom_command(
OUTPUT ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN
AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#define $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> $
{OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> $
{OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i $
{RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
DEPENDS ${spv_file} xxd
COMMENT "Converting to hpp: ${FILE_NAME}
${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
)
else()
add_custom_command(
OUTPUT ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN
AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#define $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> $
{OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> $
{OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> $
{OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define $
{HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
DEPENDS ${spv_file} xxd
COMMENT "Converting to hpp: ${FILE_NAME}
${CMAKE_BINARY_DIR}/bin/xxd"
)
endif()
endforeach()
endfunction()

if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
message(STATUS "Kompute found")
set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
add_subdirectory(kompute)

# Compile our shaders

compile_shader(SOURCES
kompute-shaders/op_scale.comp
kompute-shaders/op_scale_8.comp
kompute-shaders/op_add.comp
kompute-shaders/op_addrow.comp
kompute-shaders/op_mul.comp
kompute-shaders/op_silu.comp
kompute-shaders/op_relu.comp
kompute-shaders/op_gelu.comp
kompute-shaders/op_softmax.comp
kompute-shaders/op_norm.comp
kompute-shaders/op_rmsnorm.comp
kompute-shaders/op_diagmask.comp
kompute-shaders/op_mul_mat_mat_f32.comp
kompute-shaders/op_mul_mat_f16.comp
kompute-shaders/op_mul_mat_q8_0.comp
kompute-shaders/op_mul_mat_q4_0.comp
kompute-shaders/op_mul_mat_q4_1.comp
kompute-shaders/op_mul_mat_q6_k.comp
kompute-shaders/op_getrows_f16.comp
kompute-shaders/op_getrows_q4_0.comp
kompute-shaders/op_getrows_q4_1.comp
kompute-shaders/op_getrows_q6_k.comp
kompute-shaders/op_rope_f16.comp
kompute-shaders/op_rope_f32.comp
kompute-shaders/op_cpy_f16_f16.comp
kompute-shaders/op_cpy_f16_f32.comp
kompute-shaders/op_cpy_f32_f16.comp
kompute-shaders/op_cpy_f32_f32.comp
)

# Create a custom target for our generated shaders

add_custom_target(generated_shaders DEPENDS
shaderop_scale.h
shaderop_scale_8.h
shaderop_add.h
shaderop_addrow.h
shaderop_mul.h
shaderop_silu.h
shaderop_relu.h
shaderop_gelu.h
shaderop_softmax.h
shaderop_norm.h
shaderop_rmsnorm.h
shaderop_diagmask.h
shaderop_mul_mat_mat_f32.h
shaderop_mul_mat_f16.h
shaderop_mul_mat_q8_0.h
shaderop_mul_mat_q4_0.h
shaderop_mul_mat_q4_1.h
shaderop_mul_mat_q6_k.h
shaderop_getrows_f16.h
shaderop_getrows_q4_0.h
shaderop_getrows_q4_1.h
shaderop_getrows_q6_k.h
shaderop_rope_f16.h
shaderop_rope_f32.h
shaderop_cpy_f16_f16.h
shaderop_cpy_f16_f32.h
shaderop_cpy_f32_f16.h
shaderop_cpy_f32_f32.h
)

# Create a custom command that depends on the generated_shaders

add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-
kompute.stamp
DEPENDS generated_shaders
COMMENT "Ensuring shaders are generated before compiling ggml-
kompute.cpp"
)

# Add the stamp to the main sources to ensure dependency tracking

set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-
kompute.stamp)
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-
kompute.stamp)

add_compile_definitions(GGML_USE_KOMPUTE)

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)

set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
else()
message(WARNING "Kompute not found")
endif()
endif()

if (LLAMA_CPU_HBM)
find_library(memkind memkind REQUIRED)

add_compile_definitions(GGML_USE_CPU_HBM)

target_link_libraries(ggml PUBLIC memkind)

endif()

if (LLAMA_PERF)
add_compile_definitions(GGML_PERF)
endif()

function(get_flags CCID CCVER)

set(C_FLAGS "")
set(CXX_FLAGS "")

if (CCID MATCHES "Clang")

set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return)
set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-
prototypes -Wextra-semi)

if (
(CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR
(CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0)
)
list(APPEND C_FLAGS -Wdouble-promotion)
endif()
elseif (CCID STREQUAL "GNU")
set(C_FLAGS -Wdouble-promotion)
set(CXX_FLAGS -Wno-array-bounds)

if (CCVER VERSION_GREATER_EQUAL 7.1.0)

list(APPEND CXX_FLAGS -Wno-format-truncation)
endif()
if (CCVER VERSION_GREATER_EQUAL 8.1.0)
list(APPEND CXX_FLAGS -Wextra-semi)
endif()
endif()

set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)

set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
endfunction()

if (LLAMA_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES
"Clang")
list(APPEND C_FLAGS -Werror)
list(APPEND CXX_FLAGS -Werror)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/WX)
endif()
endif()

if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-
function)
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -
Wmissing-prototypes
-Werror=implicit-int -Werror=implicit-function-
declaration)
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)

list(APPEND C_FLAGS ${WARNING_FLAGS})

list(APPEND CXX_FLAGS ${WARNING_FLAGS})

get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})

add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};$
{GF_CXX_FLAGS}>")
else()
# todo : msvc
set(C_FLAGS "")
set(CXX_FLAGS "")
endif()
endif()

set(CUDA_CXX_FLAGS "")

if (LLAMA_CUBLAS)
set(CUDA_FLAGS -use_fast_math)

if (LLAMA_FATAL_WARNINGS)
list(APPEND CUDA_FLAGS -Werror all-warnings)
endif()

if (LLAMA_ALL_WARNINGS AND NOT MSVC)

set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c)
if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "")
list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER})
endif()

execute_process(
COMMAND ${NVCC_CMD} -Xcompiler --version
OUTPUT_VARIABLE CUDA_CCFULLVER
ERROR_QUIET
)

if (NOT CUDA_CCFULLVER MATCHES clang)

set(CUDA_CCID "GNU")
execute_process(
COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion"
OUTPUT_VARIABLE CUDA_CCVER
ERROR_QUIET
)
else()
if (CUDA_CCFULLVER MATCHES Apple)
set(CUDA_CCID "AppleClang")
else()
set(CUDA_CCID "Clang")
endif()
string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER $
{CUDA_CCFULLVER})
endif()

message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}")

get_flags(${CUDA_CCID} ${CUDA_CCVER})
list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed
to -Xcompiler later
endif()

if (NOT MSVC)
list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
endif()
endif()

if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)

if (BUILD_SHARED_LIBS)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
endif()

if (LLAMA_LTO)
include(CheckIPOSupported)
check_ipo_supported(RESULT result OUTPUT output)
if (result)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(WARNING "IPO is not supported: ${output}")
endif()
endif()

if (LLAMA_CCACHE)
find_program(LLAMA_CCACHE_FOUND ccache)
if (LLAMA_CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set(ENV{CCACHE_SLOPPINESS} time_macros)
message(STATUS "ccache found, compilation results will be cached. Disable
with LLAMA_CCACHE=OFF.")
else()
message(STATUS "Warning: ccache not found - consider installing it for
faster compilation or disable this warning with LLAMA_CCACHE=OFF")
endif ()
endif()

# this version of Apple ld64 is buggy

execute_process(
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
ERROR_VARIABLE output
OUTPUT_QUIET
)
if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
endif()

# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull
request or issue
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
if (MSVC)
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
else ()
set(CMAKE_GENERATOR_PLATFORM_LWR "")
endif ()

if (NOT MSVC)
if (LLAMA_STATIC)
add_link_options(-static)
if (MINGW)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
if (LLAMA_GPROF)
add_compile_options(-pg)
endif()
endif()

set(ARCH_FLAGS "")

if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR

STREQUAL "arm64" OR
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
message(STATUS "ARM detected")
if (MSVC)
add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
add_compile_definitions(__ARM_NEON)
add_compile_definitions(__ARM_FEATURE_FMA)

set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS})
string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}
"/arch:armv8.2")
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t
_a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
GGML_COMPILER_SUPPORT_DOTPROD)
if (GGML_COMPILER_SUPPORT_DOTPROD)
add_compile_definitions(__ARM_FEATURE_DOTPROD)
endif ()
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t
_a; float16x8_t _s = vdupq_n_f16(_a); return 0; }"
GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC)
add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
endif ()
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV})
else()
check_cxx_compiler_flag(-mfp16-format=ieee
COMPILER_SUPPORTS_FP16_FORMAT_I3E)
if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
list(APPEND ARCH_FLAGS -mfp16-format=ieee)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
# Raspberry Pi 2
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -
funsafe-math-optimizations)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
# Raspberry Pi 3, 4, Zero 2 (32-bit)
list(APPEND ARCH_FLAGS -mno-unaligned-access)
endif()
endif()
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR
MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$"))
message(STATUS "x86 detected")
if (MSVC)
# instruction set detection for MSVC only
if (LLAMA_NATIVE)
include(cmake/FindSIMD.cmake)
endif ()
if (LLAMA_AVX512)
list(APPEND ARCH_FLAGS /arch:AVX512)
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
# Do it manually.
if (LLAMA_AVX512_VBMI)
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
endif()
if (LLAMA_AVX512_VNNI)
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
endif()
elseif (LLAMA_AVX2)
list(APPEND ARCH_FLAGS /arch:AVX2)
elseif (LLAMA_AVX)
list(APPEND ARCH_FLAGS /arch:AVX)
endif()
else()
if (LLAMA_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
endif()
if (LLAMA_F16C)
list(APPEND ARCH_FLAGS -mf16c)
endif()
if (LLAMA_FMA)
list(APPEND ARCH_FLAGS -mfma)
endif()
if (LLAMA_AVX)
list(APPEND ARCH_FLAGS -mavx)
endif()
if (LLAMA_AVX2)
list(APPEND ARCH_FLAGS -mavx2)
endif()
if (LLAMA_AVX512)
list(APPEND ARCH_FLAGS -mavx512f)
list(APPEND ARCH_FLAGS -mavx512bw)
endif()
if (LLAMA_AVX512_VBMI)
list(APPEND ARCH_FLAGS -mavx512vbmi)
endif()
if (LLAMA_AVX512_VNNI)
list(APPEND ARCH_FLAGS -mavx512vnni)
endif()
endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
message(STATUS "PowerPC detected")
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)
else()
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and
query for big endian systems (ppc64/le/be)
endif()
else()
message(STATUS "Unknown architecture")
endif()

add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")

if (LLAMA_CUBLAS)
list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS})
list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags
as a single argument
if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "")
list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED})
endif()
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>")
endif()

if (MINGW)
# Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER})
endif()

#
# POSIX conformance
#

# clock_gettime came in POSIX.1b (1993)

# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
# posix_memalign came in POSIX.1-2001 / SUSv3
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
add_compile_definitions(_XOPEN_SOURCE=600)

# Somehow in OpenBSD whenever POSIX conformance is specified

# some string functions rely on locale_t availability,
# which was introduced in POSIX.1-2008, forcing us to go higher
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
remove_definitions(-D_XOPEN_SOURCE=600)
add_compile_definitions(_XOPEN_SOURCE=700)
endif()

# Data types, macros and functions related to controlling CPU affinity and
# some memory allocation are available on Linux through GNU extensions in libc
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions(_GNU_SOURCE)
endif()

# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,

# and on macOS its availability depends on enabling Darwin extensions
# similarly on DragonFly, enabling BSD extensions is necessary
if (
CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
CMAKE_SYSTEM_NAME MATCHES "iOS" OR
CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
CMAKE_SYSTEM_NAME MATCHES "DragonFly"
)
add_compile_definitions(_DARWIN_C_SOURCE)
endif()

# alloca is a non-standard interface that is not visible on BSDs when

# POSIX conformance is specified, but not all of them provide a clean way
# to enable it in such cases
if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
add_compile_definitions(__BSD_VISIBLE)
endif()
if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
add_compile_definitions(_NETBSD_SOURCE)
endif()
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
add_compile_definitions(_BSD_SOURCE)
endif()

#
# libraries
#

# ggml

add_library(ggml OBJECT
ggml.c
ggml.h
ggml-alloc.c
ggml-alloc.h
ggml-backend.c
ggml-backend.h
ggml-quants.c
ggml-quants.h
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
)

target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

target_compile_features (ggml PUBLIC c_std_11) # don't bump

target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})

add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)

if (BUILD_SHARED_LIBS)
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
install(TARGETS ggml_shared LIBRARY)
endif()

# llama

add_library(llama
llama.cpp
llama.h
)

target_include_directories(llama PUBLIC .)
target_compile_features (llama PUBLIC cxx_std_11) # don't bump

target_link_libraries(llama PRIVATE
ggml
${LLAMA_EXTRA_LIBS}
)

if (BUILD_SHARED_LIBS)
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD)
if (LLAMA_METAL)
set_target_properties(llama PROPERTIES RESOURCE "$
{CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
endif()
endif()

#
# install
#

include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}
CACHE PATH "Location of header files")
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}
CACHE PATH "Location of library files")
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
CACHE PATH "Location of binary files")
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
configure_package_config_file(
${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama
PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
LLAMA_LIB_INSTALL_DIR
LLAMA_BIN_INSTALL_DIR )

write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
VERSION ${LLAMA_INSTALL_VERSION}
COMPATIBILITY SameMajorVersion)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)

set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"

"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")

set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

install(TARGETS ggml PUBLIC_HEADER)

set_target_properties(llama PROPERTIES PUBLIC_HEADER

${CMAKE_CURRENT_SOURCE_DIR}/llama.h)
install(TARGETS llama LIBRARY PUBLIC_HEADER)

install(
FILES convert.py
PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE
DESTINATION ${CMAKE_INSTALL_BINDIR})
install(
FILES convert-lora-to-ggml.py
PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE
DESTINATION ${CMAKE_INSTALL_BINDIR})
if (LLAMA_METAL)
install(
FILES ggml-metal.metal
PERMISSIONS
OWNER_READ
OWNER_WRITE
GROUP_READ
WORLD_READ
DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()

#
# programs, examples and tests
#

add_subdirectory(common)

if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)

include(CTest)
add_subdirectory(tests)
endif ()

if (LLAMA_BUILD_EXAMPLES)
add_subdirectory(examples)
add_subdirectory(pocs)
endif()

CMake Lists
No ratings yet
CMake Lists
15 pages
C Make Lists
No ratings yet
C Make Lists
8 pages
C Make Lists
No ratings yet
C Make Lists
10 pages
C Make Lists
No ratings yet
C Make Lists
17 pages
CMake Lists
No ratings yet
CMake Lists
16 pages
CMake Lists
No ratings yet
CMake Lists
9 pages
CMakeLists
No ratings yet
CMakeLists
9 pages
CMake Lists
No ratings yet
CMake Lists
14 pages
Llamacpp
No ratings yet
Llamacpp
2 pages
CMake Lists
No ratings yet
CMake Lists
7 pages
CMakeLists
No ratings yet
CMakeLists
14 pages
CMakeLists
No ratings yet
CMakeLists
7 pages
CMakeLists
No ratings yet
CMakeLists
14 pages
C Make Lists
No ratings yet
C Make Lists
17 pages
C Make Lists
No ratings yet
C Make Lists
34 pages
CMakeLists
No ratings yet
CMakeLists
8 pages
CMake Lists
No ratings yet
CMake Lists
7 pages
CMakeOptions
No ratings yet
CMakeOptions
4 pages
CMake Lists
No ratings yet
CMake Lists
12 pages
CMake Lists
No ratings yet
CMake Lists
7 pages
CMakeLists
No ratings yet
CMakeLists
5 pages
meson_options
No ratings yet
meson_options
7 pages
CMake Lists
No ratings yet
CMake Lists
12 pages
c Make Lists
No ratings yet
c Make Lists
3 pages
CMake Lists
No ratings yet
CMake Lists
6 pages
CMakeLists
No ratings yet
CMakeLists
15 pages
CMake Lists
No ratings yet
CMake Lists
9 pages
CMake Lists
No ratings yet
CMake Lists
7 pages
CMake Lists
No ratings yet
CMake Lists
6 pages
CMake Lists
No ratings yet
CMake Lists
6 pages
C Make Lists
No ratings yet
C Make Lists
6 pages
At A Aaaaaaaaaaaa
No ratings yet
At A Aaaaaaaaaaaa
15 pages
CMakeLists
No ratings yet
CMakeLists
6 pages
CMake Lists
No ratings yet
CMake Lists
3 pages
CMakeLists
No ratings yet
CMakeLists
12 pages
CMakeLists
No ratings yet
CMakeLists
3 pages
CMakeLists
No ratings yet
CMakeLists
5 pages
CMake Lists
No ratings yet
CMake Lists
42 pages
CMakeLists
No ratings yet
CMakeLists
7 pages
C Make Lists
No ratings yet
C Make Lists
6 pages
1
No ratings yet
1
53 pages
Cmake List For Cmangos Zero
No ratings yet
Cmake List For Cmangos Zero
9 pages
Building OpenCV On Ubuntu Hardfloat Rootfs For i.MX6Q
No ratings yet
Building OpenCV On Ubuntu Hardfloat Rootfs For i.MX6Q
5 pages
CMake Lists
No ratings yet
CMake Lists
4 pages
Cross Compile QT5 Sources
No ratings yet
Cross Compile QT5 Sources
13 pages
CMake Lists
No ratings yet
CMake Lists
39 pages
CMakeLists
No ratings yet
CMakeLists
5 pages
CMakeLists
No ratings yet
CMakeLists
28 pages
CMakeLists
No ratings yet
CMakeLists
14 pages
C Make Lists
No ratings yet
C Make Lists
4 pages
CMake Lists
No ratings yet
CMake Lists
22 pages
CMake Lists
No ratings yet
CMake Lists
14 pages
CMake Lists
No ratings yet
CMake Lists
4 pages
CMake Lists
No ratings yet
CMake Lists
6 pages
vasp_5.4.4.pl2-with-intel-2018
No ratings yet
vasp_5.4.4.pl2-with-intel-2018
3 pages
Meson Options
No ratings yet
Meson Options
4 pages
CMakeLists
No ratings yet
CMakeLists
6 pages
Introduction to PHP, Part 5, Second Edition
From Everand
Introduction to PHP, Part 5, Second Edition
Adam Majczak
No ratings yet
Configuration of a Simple Samba File Server, Quota and Schedule Backup
From Everand
Configuration of a Simple Samba File Server, Quota and Schedule Backup
Dr. Hidaia Mahmood Alassouli
No ratings yet
Build your own Blockchain: Make your own blockchain and trading bot on your pc
From Everand
Build your own Blockchain: Make your own blockchain and trading bot on your pc
Magelan Cybersecurity
No ratings yet
Difference Between IPO OFS FPO
No ratings yet
Difference Between IPO OFS FPO
2 pages
ES2D7 System and Software Engineering Principles - Object Orientated Approaches
No ratings yet
ES2D7 System and Software Engineering Principles - Object Orientated Approaches
9 pages
Hermite
No ratings yet
Hermite
2 pages
Touch Screen Technology: Let'S Touch The Future
No ratings yet
Touch Screen Technology: Let'S Touch The Future
45 pages
How To Flush Gearboxes and Bearing Housings: Noria Corporation
No ratings yet
How To Flush Gearboxes and Bearing Housings: Noria Corporation
6 pages
Tle-Te 9 q1 w2 Mod2 Ict Css
No ratings yet
Tle-Te 9 q1 w2 Mod2 Ict Css
27 pages
Resolution Approving The DTP
No ratings yet
Resolution Approving The DTP
3 pages
Indicative and Annualized Rates On Deposits W.E .F - 01 .02.2024 To 31 .03.2024 - Rate Sheet
No ratings yet
Indicative and Annualized Rates On Deposits W.E .F - 01 .02.2024 To 31 .03.2024 - Rate Sheet
4 pages
Lamina Papryacea Variation
No ratings yet
Lamina Papryacea Variation
8 pages
Takehome 5 (Due Jan. 9, 2024) : 1000-Mwe LWR Thermal Efficiency 0.33 Residence Time 36 Months Capacity Factor 0.87
No ratings yet
Takehome 5 (Due Jan. 9, 2024) : 1000-Mwe LWR Thermal Efficiency 0.33 Residence Time 36 Months Capacity Factor 0.87
1 page
Dell™ Multifunction Color Laser Printer 3115cn User's Guide Manual
100% (2)
Dell™ Multifunction Color Laser Printer 3115cn User's Guide Manual
604 pages
Castillo, Kristofer Karlo C.: Objective
No ratings yet
Castillo, Kristofer Karlo C.: Objective
2 pages
AutoCAD Course Outline
50% (2)
AutoCAD Course Outline
10 pages
Data Analyst Interview Questions
No ratings yet
Data Analyst Interview Questions
7 pages
General Arrangement Up-Series (80 Gal.) 22431829 G: R.Redmond W.Sorah
No ratings yet
General Arrangement Up-Series (80 Gal.) 22431829 G: R.Redmond W.Sorah
3 pages
Investment Analysis and Portfolio Management: Lecture Presentation Software
No ratings yet
Investment Analysis and Portfolio Management: Lecture Presentation Software
77 pages
LDR
No ratings yet
LDR
8 pages
Grand Matrix-1
No ratings yet
Grand Matrix-1
16 pages
27h Radiography Permit
No ratings yet
27h Radiography Permit
1 page
2_Service_Manual_8M0105569_1C453840 & Up
100% (1)
2_Service_Manual_8M0105569_1C453840 & Up
509 pages
Isabel King Resume 7-14-21
No ratings yet
Isabel King Resume 7-14-21
2 pages
3 HR Frame Worksheet
No ratings yet
3 HR Frame Worksheet
3 pages
Hydrocortisone, Vitamin C, and Thiamine For The Treatment of Severe Sepsis and Septic Shock
No ratings yet
Hydrocortisone, Vitamin C, and Thiamine For The Treatment of Severe Sepsis and Septic Shock
10 pages
Laws and Ethics
No ratings yet
Laws and Ethics
6 pages
Uis Copy - Training-Proposal-Gsp-Camp
No ratings yet
Uis Copy - Training-Proposal-Gsp-Camp
10 pages
Chapter 1. Introduction and Theoretical Issues in Archaeological Gis. Chapter 2.
100% (1)
Chapter 1. Introduction and Theoretical Issues in Archaeological Gis. Chapter 2.
22 pages
Revision of Gramin Utsav Yojona
No ratings yet
Revision of Gramin Utsav Yojona
2 pages
BJT Amplifier Design
No ratings yet
BJT Amplifier Design
14 pages
B2B Pricing Models
No ratings yet
B2B Pricing Models
5 pages
Analog-Circuit-EC-405-UNIT-IV (1)
No ratings yet
Analog-Circuit-EC-405-UNIT-IV (1)
49 pages