update

aa47c1c5 · xuxzh1 · 0cb78a2f · 0cb78a2f · 0cb78a2f · 0cb78a2f
Commit aa47c1c5 authored Dec 06, 2024 by xuxzh1 🎱
20 changed files
--- a/llm/generate/generate_darwin.go
+++ b/llm/generate/generate_darwin.go
-package generate
-//go:generate bash ./gen_darwin.sh
--- a/llm/generate/generate_linux.go
+++ b/llm/generate/generate_linux.go
-package generate
-//go:generate bash ./gen_linux.sh
--- a/llm/generate/generate_windows.go
+++ b/llm/generate/generate_windows.go
-package generate
-//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
--- a/llm/llama.cpp/.clang-tidy
+++ b/llm/llama.cpp/.clang-tidy
---
-Checks: >
-    bugprone-*,
-    -bugprone-easily-swappable-parameters,
-    -bugprone-implicit-widening-of-multiplication-result,
-    -bugprone-misplaced-widening-cast,
-    -bugprone-narrowing-conversions,
-    readability-*,
-    -readability-avoid-unconditional-preprocessor-if,
-    -readability-function-cognitive-complexity,
-    -readability-identifier-length,
-    -readability-implicit-bool-conversion,
-    -readability-magic-numbers,
-    -readability-uppercase-literal-suffix,
-    -readability-simplify-boolean-expr,
-    clang-analyzer-*,
-    -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
-    performance-*,
-    portability-*,
-    misc-*,
-    -misc-const-correctness,
-    -misc-non-private-member-variables-in-classes,
-    -misc-no-recursion,
-FormatStyle: none
--- a/llm/llama.cpp/.devops/cloud-v-pipeline
+++ b/llm/llama.cpp/.devops/cloud-v-pipeline
-node('x86_runner1'){            // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
-    stage('Cleanup'){
-        cleanWs()               // Cleaning previous CI build in workspace
-    }
-    stage('checkout repo'){
-        retry(5){               // Retry if the cloning fails due to some reason
-            checkout scm        // Clone the repo on Runner
-        }
-    }
-    stage('Compiling llama.cpp'){
-        sh'''#!/bin/bash
-            make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
-        '''
-    }
-    stage('Running llama.cpp'){
-        sh'''#!/bin/bash
-            module load gnu-bin2/0.1            # loading latest versions of vector qemu and vector gcc
-            qemu-riscv64 -L /softwares/gnu-bin2/sysroot  -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt            # Running llama.cpp on vector qemu-riscv64
-            cat llama_log.txt                   # Printing results
-        '''
-    }
-}
--- a/llm/llama.cpp/.devops/full-cuda.Dockerfile
+++ b/llm/llama.cpp/.devops/full-cuda.Dockerfile
-ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
-ARG CUDA_VERSION=11.7.1
-# Target the CUDA build image
-ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-FROM ${BASE_CUDA_DEV_CONTAINER} AS build
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
-RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
-COPY requirements.txt   requirements.txt
-COPY requirements       requirements
-RUN pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt
-WORKDIR /app
-COPY . .
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable CUDA
-ENV GGML_CUDA=1
-# Enable cURL
-ENV LLAMA_CURL=1
-RUN make -j$(nproc)
-ENTRYPOINT ["/app/.devops/tools.sh"]
--- a/llm/llama.cpp/.devops/full-rocm.Dockerfile
+++ b/llm/llama.cpp/.devops/full-rocm.Dockerfile
-ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
-ARG ROCM_VERSION=5.6
-# Target the CUDA build image
-ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
-FROM ${BASE_ROCM_DEV_CONTAINER} AS build
-# Unless otherwise specified, we make a fat build.
-# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
-# This is mostly tied to rocBLAS supported archs.
-ARG ROCM_DOCKER_ARCH=\
-    gfx803 \
-    gfx900 \
-    gfx906 \
-    gfx908 \
-    gfx90a \
-    gfx1010 \
-    gfx1030 \
-    gfx1100 \
-    gfx1101 \
-    gfx1102
-COPY requirements.txt   requirements.txt
-COPY requirements       requirements
-RUN pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt
-WORKDIR /app
-COPY . .
-# Set nvcc architecture
-ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
-# Enable ROCm
-ENV GGML_HIPBLAS=1
-ENV CC=/opt/rocm/llvm/bin/clang
-ENV CXX=/opt/rocm/llvm/bin/clang++
-# Enable cURL
-ENV LLAMA_CURL=1
-RUN apt-get update && \
-    apt-get install -y libcurl4-openssl-dev
-RUN make -j$(nproc)
-ENTRYPOINT ["/app/.devops/tools.sh"]
--- a/llm/llama.cpp/.devops/full.Dockerfile
+++ b/llm/llama.cpp/.devops/full.Dockerfile
-ARG UBUNTU_VERSION=22.04
-FROM ubuntu:$UBUNTU_VERSION AS build
-RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
-COPY requirements.txt   requirements.txt
-COPY requirements       requirements
-RUN pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt
-WORKDIR /app
-COPY . .
-ENV LLAMA_CURL=1
-RUN make -j$(nproc)
-ENV LC_ALL=C.utf8
-ENTRYPOINT ["/app/.devops/tools.sh"]
--- a/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-cli-cuda.Dockerfile
-ARG UBUNTU_VERSION=22.04
-# This needs to generally match the container host's environment.
-ARG CUDA_VERSION=11.7.1
-# Target the CUDA build image
-ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-# Target the CUDA runtime image
-ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
-FROM ${BASE_CUDA_DEV_CONTAINER} AS build
-# Unless otherwise specified, we make a fat build.
-ARG CUDA_DOCKER_ARCH=all
-RUN apt-get update && \
-    apt-get install -y build-essential git
-WORKDIR /app
-COPY . .
-# Set nvcc architecture
-ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable CUDA
-ENV GGML_CUDA=1
-RUN make -j$(nproc) llama-cli
-FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
-RUN apt-get update && \
-    apt-get install -y libgomp1
-COPY --from=build /app/llama-cli /llama-cli
-ENTRYPOINT [ "/llama-cli" ]
--- a/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-cli-intel.Dockerfile
--- a/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-cli-rocm.Dockerfile
--- a/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-cli-vulkan.Dockerfile
--- a/llm/llama.cpp/.devops/llama-cli.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-cli.Dockerfile
--- a/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
+++ b/llm/llama.cpp/.devops/llama-cpp-cuda.srpm.spec
--- a/llm/llama.cpp/.devops/llama-cpp.srpm.spec
+++ b/llm/llama.cpp/.devops/llama-cpp.srpm.spec
--- a/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-server-cuda.Dockerfile
--- a/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-server-intel.Dockerfile
--- a/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-server-rocm.Dockerfile
--- a/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-server-vulkan.Dockerfile
--- a/llm/llama.cpp/.devops/llama-server.Dockerfile
+++ b/llm/llama.cpp/.devops/llama-server.Dockerfile