From 4d0a1606232e2a50707a78354e4e8b4d4d7d79ce Mon Sep 17 00:00:00 2001 From: Kun Ran Date: Mon, 15 Dec 2025 15:58:59 +1100 Subject: [PATCH 1/3] feat: add conda build env and script --- Dockerfile | 18 +-- Dockerfile.rhel8 | 71 ++++++++++++ environment.yaml | 25 ++++ scripts/build_conda.sh | 256 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 361 insertions(+), 9 deletions(-) create mode 100644 Dockerfile.rhel8 create mode 100644 environment.yaml create mode 100755 scripts/build_conda.sh diff --git a/Dockerfile b/Dockerfile index ea1979f3f..d2b580358 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,17 @@ #Copyright(c) Microsoft Corporation.All rights reserved. #Licensed under the MIT license. +# Kun: updated to install DiskANN to system directly, and update base image -FROM ubuntu:jammy +FROM ubuntu:24.04 RUN apt update -RUN apt install -y software-properties-common -RUN add-apt-repository -y ppa:git-core/ppa -RUN apt update -RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 +RUN apt install -y software-properties-common git make cmake g++ libaio-dev \ + libgoogle-perftools-dev libunwind-dev clang-format libboost-dev \ + libboost-program-options-dev libmkl-full-dev libcpprest-dev WORKDIR /app -RUN git clone https://github.com/microsoft/DiskANN.git +RUN cd /app && git clone https://github.com/rmit-ir/DiskANN WORKDIR /app/DiskANN -RUN mkdir build -RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -RUN cmake --build build -- -j +RUN cd /app/DiskANN && mkdir build +RUN cd /app/DiskANN/build && cmake -DCMAKE_INSTALL_PREFIX=/usr/local \ + -DCMAKE_BUILD_TYPE=Release .. && make -j && make install diff --git a/Dockerfile.rhel8 b/Dockerfile.rhel8 new file mode 100644 index 000000000..03fe90fe7 --- /dev/null +++ b/Dockerfile.rhel8 @@ -0,0 +1,71 @@ +# docker build -f Dockerfile.rhel8 -t diskann:latest-rhel8 . +# Rocky Linux 8 provides glibc 2.28 compatibility (RHEL 8 compatible) +FROM rockylinux:8 + +# Enable PowerTools (CodeReady Builder) for additional development packages +RUN dnf install -y dnf-plugins-core && \ + dnf config-manager --set-enabled powertools && \ + dnf install -y epel-release + +# Install build essentials and dependencies +RUN dnf groupinstall -y "Development Tools" && \ + dnf install -y \ + git \ + make \ + cmake \ + gcc \ + gcc-c++ \ + libaio-devel \ + gperftools-devel \ + libunwind-devel \ + clang-tools-extra \ + boost-devel \ + openssl-devel \ + bzip2-devel \ + libffi-devel \ + zlib-devel \ + wget + +# Install Python 3.11 from source +RUN cd /tmp && \ + wget https://www.python.org/ftp/python/3.11.10/Python-3.11.10.tgz && \ + tar xzf Python-3.11.10.tgz && \ + cd Python-3.11.10 && \ + ./configure --enable-optimizations --with-ensurepip=install && \ + make -j$(nproc) && \ + make altinstall && \ + cd / && \ + rm -rf /tmp/Python-3.11.10* && \ + ln -sf /usr/local/bin/python3.11 /usr/local/bin/python3 && \ + ln -sf /usr/local/bin/pip3.11 /usr/local/bin/pip3 + +# Install Intel MKL +RUN dnf install -y yum-utils && \ + yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo && \ + rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ + dnf install -y intel-mkl-2020.4-912 + +# Build and install cpprestsdk from source (not available in standard RHEL 8 repos) +RUN cd /tmp && \ + git clone https://github.com/microsoft/cpprestsdk.git && \ + cd cpprestsdk && \ + git checkout 2.10.18 && \ + git submodule update --init && \ + mkdir build && \ + cd build && \ + cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF -DBUILD_SAMPLES=OFF && \ + make -j$(nproc) && \ + make install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/cpprestsdk + +WORKDIR /app +RUN git clone https://github.com/microsoft/DiskANN.git +WORKDIR /app/DiskANN +RUN mkdir build +RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release +RUN cmake --build build -- -j$(nproc) + +# # Set library path for Intel MKL +# ENV LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:${LD_LIBRARY_PATH} diff --git a/environment.yaml b/environment.yaml new file mode 100644 index 000000000..17847b625 --- /dev/null +++ b/environment.yaml @@ -0,0 +1,25 @@ +name: diskann-conda +channels: + - conda-forge + - anaconda +dependencies: + - bzip2 + - cmake + - curl + - make + - git + - gcc_linux-64 + - gxx_linux-64 + - binutils_linux-64 + - boost-cpp + - gperftools + - libunwind + - libaio + - mkl-devel + - intel-openmp + - pkg-config + - tar + - wget + - xz + - zstd + - pkg-config diff --git a/scripts/build_conda.sh b/scripts/build_conda.sh new file mode 100755 index 000000000..4aa2d8f64 --- /dev/null +++ b/scripts/build_conda.sh @@ -0,0 +1,256 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ -z "${CONDA_PREFIX:-}" ]]; then + echo "Activate the conda environment first (e.g., conda activate diskann-conda)." >&2 + exit 1 +fi + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +DEPS_DIR="${ROOT_DIR}/.deps" +SRC_DIR="${DEPS_DIR}/src" +PREFIX_DIR="${DEPS_DIR}/prefix" +BUILD_DIR="${ROOT_DIR}/build" + +mkdir -p "${SRC_DIR}" "${PREFIX_DIR}" + +JOBS="${JOBS:-}" +if [[ -z "${JOBS}" ]]; then + if command -v nproc >/dev/null 2>&1; then + JOBS="$(nproc)" + else + JOBS="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 4)" + fi +fi + +if command -v mamba >/dev/null 2>&1; then + INSTALLER="mamba" +else + INSTALLER="conda" +fi + +conda_install() { + local pkgs=("$@") + "${INSTALLER}" install -y -c conda-forge -c anaconda -c https://software.repos.intel.com/python/conda/ "${pkgs[@]}" +} + +download() { + local url="$1" + local out="$2" + if command -v curl >/dev/null 2>&1; then + curl -L -o "${out}" "${url}" + else + wget -O "${out}" "${url}" + fi +} + +ensure_conda_deps() { + if ! conda_install boost-cpp gperftools libaio libunwind mkl-devel intel-openmp cmake make pkg-config binutils_linux-64 gcc_linux-64 gxx_linux-64; then + echo "Conda install failed; will attempt source builds for Boost, gperftools, and libaio." >&2 + fi +} + +ensure_mkl() { + local mkl_lib="${MKL_PATH:-${CONDA_PREFIX}/lib}/libmkl_core.so" + local mkl_inc="${MKL_INCLUDE_PATH:-${CONDA_PREFIX}/include}/mkl.h" + local omp_lib="${OMP_PATH:-${CONDA_PREFIX}/lib}/libiomp5.so" + + if [[ ! -f "${mkl_lib}" || ! -f "${mkl_inc}" || ! -f "${omp_lib}" ]]; then + echo "MKL or Intel OpenMP not found; retrying conda install from Intel channel." >&2 + conda_install mkl-devel intel-openmp || true + fi + + if [[ ! -f "${mkl_lib}" || ! -f "${mkl_inc}" || ! -f "${omp_lib}" ]]; then + echo "MKL/Intel OpenMP still missing. Install them via conda (mkl-devel, intel-openmp) and retry." >&2 + exit 1 + fi +} + +ensure_boost() { + local have_boost=0 + if ls "${CONDA_PREFIX}"/lib/libboost_program_options* >/dev/null 2>&1; then + have_boost=1 + elif ls "${PREFIX_DIR}"/lib/libboost_program_options* >/dev/null 2>&1; then + have_boost=1 + fi + + if [[ "${have_boost}" -eq 1 ]]; then + return 0 + fi + + local boost_version="1.85.0" + local boost_version_underscore="1_85_0" + local boost_tar="boost_${boost_version_underscore}.tar.gz" + local boost_url="https://boostorg.jfrog.io/artifactory/main/release/${boost_version}/source/${boost_tar}" + local boost_src="${SRC_DIR}/boost_${boost_version_underscore}" + + if [[ ! -d "${boost_src}" ]]; then + echo "Building Boost from source (${boost_version})..." >&2 + download "${boost_url}" "${SRC_DIR}/${boost_tar}" + tar -xzf "${SRC_DIR}/${boost_tar}" -C "${SRC_DIR}" + fi + + pushd "${boost_src}" >/dev/null + ./bootstrap.sh --with-libraries=program_options --prefix="${PREFIX_DIR}" + ./b2 install -j "${JOBS}" + popd >/dev/null +} + +ensure_gperftools() { + if ls "${CONDA_PREFIX}"/lib/libtcmalloc* >/dev/null 2>&1; then + return 0 + fi + if ls "${PREFIX_DIR}"/lib/libtcmalloc* >/dev/null 2>&1; then + return 0 + fi + + local gpt_version="2.17.2" + local gpt_tar="gperftools-${gpt_version}.tar.gz" + local gpt_url="https://github.com/gperftools/gperftools/releases/download/gperftools-${gpt_version}/${gpt_tar}" + local gpt_src="${SRC_DIR}/gperftools-${gpt_version}" + + if [[ ! -d "${gpt_src}" ]]; then + echo "Building gperftools from source (${gpt_version})..." >&2 + download "${gpt_url}" "${SRC_DIR}/${gpt_tar}" + tar -xzf "${SRC_DIR}/${gpt_tar}" -C "${SRC_DIR}" + fi + + pushd "${gpt_src}" >/dev/null + ./configure --prefix="${PREFIX_DIR}" + make -j "${JOBS}" + make install + popd >/dev/null +} + +ensure_libaio() { + if [[ -f "${CONDA_PREFIX}/lib/libaio.so" || -f "${PREFIX_DIR}/lib/libaio.so" ]]; then + return 0 + fi + + local libaio_version="0.3.113" + local libaio_tar="libaio-${libaio_version}.tar.gz" + local libaio_url="https://pagure.io/libaio/archive/libaio-${libaio_version}/${libaio_tar}" + local libaio_src="${SRC_DIR}/libaio-${libaio_version}" + + if [[ ! -d "${libaio_src}" ]]; then + echo "Building libaio from source (${libaio_version})..." >&2 + download "${libaio_url}" "${SRC_DIR}/${libaio_tar}" + tar -xzf "${SRC_DIR}/${libaio_tar}" -C "${SRC_DIR}" + fi + + pushd "${libaio_src}" >/dev/null + make -j "${JOBS}" + make install prefix="${PREFIX_DIR}" libdir="${PREFIX_DIR}/lib" + popd >/dev/null +} + +ensure_conda_deps +ensure_mkl +ensure_boost +ensure_gperftools +ensure_libaio + +export CMAKE_PREFIX_PATH="${PREFIX_DIR}:${CONDA_PREFIX}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}" +export CPATH="${PREFIX_DIR}/include:${CONDA_PREFIX}/include${CPATH:+:${CPATH}}" +export LIBRARY_PATH="${PREFIX_DIR}/lib:${CONDA_PREFIX}/lib${LIBRARY_PATH:+:${LIBRARY_PATH}}" +export LD_LIBRARY_PATH="${PREFIX_DIR}/lib:${CONDA_PREFIX}/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" +export PKG_CONFIG_PATH="${PREFIX_DIR}/lib/pkgconfig:${CONDA_PREFIX}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" + +MKL_PATH="${MKL_PATH:-${CONDA_PREFIX}/lib}" +MKL_INCLUDE_PATH="${MKL_INCLUDE_PATH:-${CONDA_PREFIX}/include}" +OMP_PATH="${OMP_PATH:-${CONDA_PREFIX}/lib}" + +REAL_CC_BIN="${CONDA_PREFIX}/bin/x86_64-conda-linux-gnu-gcc" +REAL_CXX_BIN="${CONDA_PREFIX}/bin/x86_64-conda-linux-gnu-g++" +LD_BIN="${CONDA_PREFIX}/bin/x86_64-conda-linux-gnu-ld" +AR_BIN="${CONDA_PREFIX}/bin/x86_64-conda-linux-gnu-ar" +RANLIB_BIN="${CONDA_PREFIX}/bin/x86_64-conda-linux-gnu-ranlib" + +if [[ ! -x "${REAL_CC_BIN}" ]]; then + REAL_CC_BIN="${CONDA_PREFIX}/bin/gcc" +fi +if [[ ! -x "${REAL_CXX_BIN}" ]]; then + REAL_CXX_BIN="${CONDA_PREFIX}/bin/g++" +fi +if [[ ! -x "${LD_BIN}" ]]; then + LD_BIN="${CONDA_PREFIX}/bin/ld" +fi +if [[ ! -x "${AR_BIN}" ]]; then + AR_BIN="${CONDA_PREFIX}/bin/ar" +fi +if [[ ! -x "${RANLIB_BIN}" ]]; then + RANLIB_BIN="${CONDA_PREFIX}/bin/ranlib" +fi + +if [[ -f "${BUILD_DIR}/CMakeCache.txt" ]]; then + rm -rf "${BUILD_DIR}/CMakeCache.txt" "${BUILD_DIR}/CMakeFiles" +fi + +TOOLCHAIN_BIN="${DEPS_DIR}/toolchain/bin" +mkdir -p "${TOOLCHAIN_BIN}" + +if [[ -x "${LD_BIN}" ]]; then + cat > "${TOOLCHAIN_BIN}/ld" < "${TOOLCHAIN_BIN}/gcc" < "${TOOLCHAIN_BIN}/cc" < "${TOOLCHAIN_BIN}/g++" < "${TOOLCHAIN_BIN}/c++" < Date: Sun, 1 Feb 2026 12:55:18 +0000 Subject: [PATCH 2/3] docs: build diskann with conda --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index a20a1d671..4f312e23f 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,21 @@ sudo sh l_BaseKit_p_2022.1.2.146.sh -a --components intel.oneapi.lin.mkl.devel - mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && make -j ``` +### Conda build (no sudo/apt) + +Use the provided conda environment and build script from the repo root: +```bash +conda env create -f environment.yaml +conda activate diskann-conda +./scripts/build_conda.sh +``` + +To use the built binaries anywhere in the current terminal session: +```bash +export PATH="$PWD/build/apps:$PWD/build/apps/utils:$PATH" +export LD_LIBRARY_PATH="$PWD/build/src:$PWD/.deps/prefix/lib:$CONDA_PREFIX/lib:${LD_LIBRARY_PATH:-}" +``` + ## Windows build: The Windows version has been tested with Enterprise editions of Visual Studio 2022, 2019 and 2017. It should work with the Community and Professional editions as well without any changes. From 7708f545d24210302d3748eb7450ba02747ebe6a Mon Sep 17 00:00:00 2001 From: Kun Ran Date: Sun, 1 Feb 2026 13:01:26 +0000 Subject: [PATCH 3/3] fix: conda build failure on a particular machine fix Dockerfile build repo remove unused Dockerfile revert Dockerfile revert Dockerfile --- Dockerfile | 18 ++++++------ Dockerfile.rhel8 | 71 ---------------------------------------------- include/distance.h | 2 ++ 3 files changed, 11 insertions(+), 80 deletions(-) delete mode 100644 Dockerfile.rhel8 diff --git a/Dockerfile b/Dockerfile index d2b580358..ea1979f3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,17 @@ #Copyright(c) Microsoft Corporation.All rights reserved. #Licensed under the MIT license. -# Kun: updated to install DiskANN to system directly, and update base image -FROM ubuntu:24.04 +FROM ubuntu:jammy RUN apt update -RUN apt install -y software-properties-common git make cmake g++ libaio-dev \ - libgoogle-perftools-dev libunwind-dev clang-format libboost-dev \ - libboost-program-options-dev libmkl-full-dev libcpprest-dev +RUN apt install -y software-properties-common +RUN add-apt-repository -y ppa:git-core/ppa +RUN apt update +RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 WORKDIR /app -RUN cd /app && git clone https://github.com/rmit-ir/DiskANN +RUN git clone https://github.com/microsoft/DiskANN.git WORKDIR /app/DiskANN -RUN cd /app/DiskANN && mkdir build -RUN cd /app/DiskANN/build && cmake -DCMAKE_INSTALL_PREFIX=/usr/local \ - -DCMAKE_BUILD_TYPE=Release .. && make -j && make install +RUN mkdir build +RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release +RUN cmake --build build -- -j diff --git a/Dockerfile.rhel8 b/Dockerfile.rhel8 deleted file mode 100644 index 03fe90fe7..000000000 --- a/Dockerfile.rhel8 +++ /dev/null @@ -1,71 +0,0 @@ -# docker build -f Dockerfile.rhel8 -t diskann:latest-rhel8 . -# Rocky Linux 8 provides glibc 2.28 compatibility (RHEL 8 compatible) -FROM rockylinux:8 - -# Enable PowerTools (CodeReady Builder) for additional development packages -RUN dnf install -y dnf-plugins-core && \ - dnf config-manager --set-enabled powertools && \ - dnf install -y epel-release - -# Install build essentials and dependencies -RUN dnf groupinstall -y "Development Tools" && \ - dnf install -y \ - git \ - make \ - cmake \ - gcc \ - gcc-c++ \ - libaio-devel \ - gperftools-devel \ - libunwind-devel \ - clang-tools-extra \ - boost-devel \ - openssl-devel \ - bzip2-devel \ - libffi-devel \ - zlib-devel \ - wget - -# Install Python 3.11 from source -RUN cd /tmp && \ - wget https://www.python.org/ftp/python/3.11.10/Python-3.11.10.tgz && \ - tar xzf Python-3.11.10.tgz && \ - cd Python-3.11.10 && \ - ./configure --enable-optimizations --with-ensurepip=install && \ - make -j$(nproc) && \ - make altinstall && \ - cd / && \ - rm -rf /tmp/Python-3.11.10* && \ - ln -sf /usr/local/bin/python3.11 /usr/local/bin/python3 && \ - ln -sf /usr/local/bin/pip3.11 /usr/local/bin/pip3 - -# Install Intel MKL -RUN dnf install -y yum-utils && \ - yum-config-manager --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo && \ - rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && \ - dnf install -y intel-mkl-2020.4-912 - -# Build and install cpprestsdk from source (not available in standard RHEL 8 repos) -RUN cd /tmp && \ - git clone https://github.com/microsoft/cpprestsdk.git && \ - cd cpprestsdk && \ - git checkout 2.10.18 && \ - git submodule update --init && \ - mkdir build && \ - cd build && \ - cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF -DBUILD_SAMPLES=OFF && \ - make -j$(nproc) && \ - make install && \ - ldconfig && \ - cd / && \ - rm -rf /tmp/cpprestsdk - -WORKDIR /app -RUN git clone https://github.com/microsoft/DiskANN.git -WORKDIR /app/DiskANN -RUN mkdir build -RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -RUN cmake --build build -- -j$(nproc) - -# # Set library path for Intel MKL -# ENV LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:${LD_LIBRARY_PATH} diff --git a/include/distance.h b/include/distance.h index f3b1de25a..9cd22f20c 100644 --- a/include/distance.h +++ b/include/distance.h @@ -1,4 +1,6 @@ #pragma once + +#include #include "windows_customizations.h" #include