# Ollama Configuration # Docs: https://github.com/ollama/ollama/blob/main/docs/faq.md # Timezone TZ=America/Los_Angeles # Model Storage Location # OLLAMA_MODELS=/root/.ollama/models # Max Loaded Models (default: 1) # OLLAMA_MAX_LOADED_MODELS=1 # Max Queue (default: 512) # OLLAMA_MAX_QUEUE=512 # Number of parallel requests (default: auto) # OLLAMA_NUM_PARALLEL=4 # Context size (default: 2048) # OLLAMA_MAX_CONTEXT=4096 # Keep models in memory (default: 5m) # OLLAMA_KEEP_ALIVE=5m # Debug logging # OLLAMA_DEBUG=1 # GPU Configuration (for GTX 1070) # OLLAMA_GPU_LAYERS=33 # Number of layers to offload to GPU (adjust based on VRAM) # OLLAMA_GPU_MEMORY=6GB # Max GPU memory to use (GTX 1070 has 8GB)