matthoffner commited on
Commit
326d176
·
verified ·
1 Parent(s): bbfd261

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +50 -56
Dockerfile CHANGED
@@ -1,63 +1,57 @@
1
- # Using the specified base image that's suited for llama-cpp-python
2
- # Define the image argument and provide a default value
3
- ARG IMAGE=python:3-slim-bullseye
4
-
5
- # Use the image as specified
6
- FROM ${IMAGE}
7
-
8
- # Re-declare the ARG after FROM
9
- ARG IMAGE
10
-
11
- # Update and upgrade the existing packages
12
- RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
13
- python3 \
14
- python3-pip \
15
- ninja-build \
16
- build-essential
17
-
18
- RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
19
-
20
- # Perform the conditional installations based on the image
21
- RUN echo "Image: ${IMAGE}" && \
22
- if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \
23
- echo "OpenBLAS install:" && \
24
- apt-get install -y --no-install-recommends libopenblas-dev && \
25
- LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \
26
- else \
27
- echo "CuBLAS install:" && \
28
- LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \
29
- fi
30
-
31
- VOLUME ["/models"]
32
-
33
- # Environment variables for model details
34
- ENV MODEL_NAME="llava-1.6-mistral-7b-gguf"
35
- ENV DEFAULT_MODEL_FILE="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
36
- ENV MODEL_USER="cjpais"
37
- ENV DEFAULT_MODEL_BRANCH="main"
38
- ENV DEFAULT_CLIP_MODEL_FILE="mmproj-model-f16.gguf"
39
- ENV MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_MODEL_FILE}"
40
- ENV CLIP_MODEL_URL="https://huggingface.co/${MODEL_USER}/${MODEL_NAME}/resolve/${DEFAULT_MODEL_BRANCH}/${DEFAULT_CLIP_MODEL_FILE}"
41
-
42
- # Set up the working directory
43
  WORKDIR /app
44
 
45
- # Ensure curl is available for downloading the models
46
- RUN apt-get update && apt-get install -y curl && \
47
- apt-get clean && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Create a directory for the models
50
- RUN mkdir -p /models
51
 
52
- # Download the models
53
- RUN curl -L "${MODEL_URL}" -o /models/${DEFAULT_MODEL_FILE} && \
54
- curl -L "${CLIP_MODEL_URL}" -o /models/${DEFAULT_CLIP_MODEL_FILE}
55
 
 
56
 
57
- ENV HOST=0.0.0.0
58
- ENV PORT=8000
59
- # Expose the port the server will run on
60
- EXPOSE 8000
61
 
62
- # Command to run the server, using environment variables for model paths
63
- CMD ["python3", "-m", "llama_cpp.server", "--model", "/models/llava-v1.6-mistral-7b.Q3_K_XS.gguf", "--clip_model_path", "/models/mmproj-model-f16.gguf", "--chat_format", "llava-1-5"]
 
1
+ # Base image
2
+ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
3
+
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ # Update and install necessary dependencies
7
+ RUN apt update && \
8
+ apt install --no-install-recommends -y \
9
+ build-essential \
10
+ python3 \
11
+ python3-pip \
12
+ wget \
13
+ curl \
14
+ git \
15
+ cmake \
16
+ zlib1g-dev \
17
+ libblas-dev && \
18
+ apt clean && \
19
+ rm -rf /var/lib/apt/lists/*
20
+
21
+ # Setting up CUDA environment variables (this may not be necessary since you're using the official nvidia/cuda image, but it's good to be explicit)
22
+ ENV PATH="/usr/local/cuda/bin:$PATH" \
23
+ LD_LIBRARY_PATH="/usr/local/cuda/lib64:$LD_LIBRARY_PATH" \
24
+ CUDA_HOME="/usr/local/cuda"
25
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  WORKDIR /app
27
 
28
+ # Download ggml and mmproj models from HuggingFace
29
+ RUN wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf && \
30
+ wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf
31
+
32
+ # Clone and build llava-server with CUDA support
33
+ RUN git clone https://github.com/ggerganov/llama.cpp.git && \
34
+ cd llama.cpp && \
35
+ git submodule init && \
36
+ git submodule update && \
37
+ make LLAMA_CUBLAS=1
38
+
39
+ # Create a non-root user for security reasons
40
+ RUN useradd -m -u 1000 user && \
41
+ mkdir -p /home/user/app && \
42
+ cp /app/ggml-model-q4_k.gguf /home/user/app && \
43
+ cp /app/mmproj-model-f16.gguf /home/user/app
44
 
45
+ RUN chown user:user /home/user/app/ggml-model-q4_k.gguf && \
46
+ chown user:user /home/user/app/mmproj-model-f16.gguf
47
 
48
+ USER user
49
+ ENV HOME=/home/user
 
50
 
51
+ WORKDIR $HOME/app
52
 
53
+ # Expose the port
54
+ EXPOSE 8080
 
 
55
 
56
+ # Start the llava-server with models
57
+ CMD ["/app/llama.cpp/server", "--model", "ggml-model-q4_k.gguf", "--mmproj", "mmproj-model-f16.gguf", "--threads", "4"]