llama-cpp-server

Paused

App Files Files Community

llama-cpp-server / Dockerfile

matthoffner

Update Dockerfile

b7918d4 verified over 2 years ago

raw

history blame contribute delete

2.16 kB

	# Set arguments for versions
	ARG UBUNTU_VERSION=22.04
	ARG CUDA_VERSION=11.7.1
	ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
	ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

	# Build stage with CUDA development container
	FROM ${BASE_CUDA_DEV_CONTAINER} as build

	# Install build essentials and git
	RUN apt-get update && \
	apt-get install -y build-essential git

	# Install Python3 and pip
	RUN apt-get install -y python3 python3-pip

	# Set work directory to /app
	WORKDIR /app

	# Copy your application code to the container
	COPY . .

	# Create a non-root user 'user' in the build stage as well
	RUN useradd -m -u 1000 user

	# Switch to the non-root user for any further commands
	USER user

	# Set nvcc architecture and enable cuBLAS
	ENV CUDA_DOCKER_ARCH=all \
	LLAMA_CUBLAS=1

	# Runtime stage with CUDA runtime container
	FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

	# Re-create the non-root user 'user' in the runtime stage
	RUN useradd -m -u 1000 user && \
	apt-get update && \
	apt-get install -y libopenblas-dev ninja-build build-essential pkg-config curl

	# Switch to non-root user
	USER user

	# Set home and path for the user
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH

	# Set work directory to user's home directory
	WORKDIR $HOME/app

	# Install Python3 and pip for the runtime container
	USER root
	RUN apt-get install -y python3 python3-pip

	# Switch back to the non-root user for installing Python packages
	USER user
	RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
	pip install --verbose llama-cpp-python[server]

	# Download the model to the user's directory
	RUN mkdir $HOME/model && \
	curl -L https://huggingface.co/matthoffner/Magicoder-S-DS-6.7B-GGUF/resolve/main/Magicoder-S-DS-6.7B_Q4_K_M.gguf -o $HOME/model/gguf-model.gguf

	COPY --chown=user ./main.py $HOME/app/

	# Set environment variables for the host
	ENV HOST=0.0.0.0 \
	PORT=7860

	# Expose the server port
	EXPOSE ${PORT}

	RUN ls -la $HOME/model

	# Run the server start script
	CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug"]