add_custom_target(libc-gpu-math-benchmarks)

set(math_benchmark_flags "")
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
  if(CUDAToolkit_FOUND)
    set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
    if (EXISTS ${libdevice_path})
      list(APPEND math_benchmark_flags
        "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}")
      # Compile definition needed so the benchmark knows to register
      # NVPTX benchmarks.
      list(APPEND math_benchmark_flags "-DNVPTX_MATH_FOUND=1")
    endif()
  endif()
endif()

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
  find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
  if(AMDDeviceLibs_FOUND)
    get_target_property(ocml_path ocml IMPORTED_LOCATION)
    list(APPEND math_benchmark_flags
        "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
    list(APPEND math_benchmark_flags "-DAMDGPU_MATH_FOUND=1")
  endif()
endif()

add_benchmark(
  sin_benchmark
  SUITE
    libc-gpu-math-benchmarks
  SRCS
    sin_benchmark.cpp
  DEPENDS
    libc.src.math.sin
    libc.src.math.sinf
    libc.src.stdlib.srand
    libc.src.stdlib.rand
    libc.src.__support.FPUtil.fp_bits
    libc.src.__support.CPP.bit
    libc.src.__support.CPP.array
  COMPILE_OPTIONS
    ${math_benchmark_flags}
  LOADER_ARGS
    --threads 64
)

add_benchmark(
  atan2_benchmark
  SUITE
    libc-gpu-math-benchmarks
  SRCS
    atan2_benchmark.cpp
  DEPENDS
    libc.src.math.atan2
    libc.src.stdlib.srand
    libc.src.stdlib.rand
    libc.src.__support.FPUtil.fp_bits
    libc.src.__support.CPP.bit
    libc.src.__support.CPP.array
  COMPILE_OPTIONS
    ${math_benchmark_flags}
  LOADER_ARGS
    --threads 64
)