3.1 Qdrant Configuration for Production

Source: Qdrant Benchmarks

from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

client = QdrantClient(host="localhost", port=6333)

# Create collection with HNSW index
client.create_collection(
    collection_name="documents",
    vectors_config=VectorParams(
        size=768,  # Match your embedding dimension
        distance=Distance.COSINE,
        on_disk=False,  # Keep in RAM for speed
    ),
    hnsw_config={
        "m": 16,           # Connections per layer
        "ef_construct": 100,  # Construction time accuracy
    },
    optimizers_config={
        "memmap_threshold": 20000,
    }
)

# Search with HNSW parameters
results = client.search(
    collection_name="documents",
    query_vector=query_embedding,
    limit=10,
    search_params={
        "hnsw_ef": 128,  # Query time accuracy (higher = more accurate)
        "exact": False,
    }
)
Key Insight

Qdrant achieves highest RPS and lowest latencies in most benchmarks (2024). Milvus excels at indexing speed.

3.2 ChromaDB for Rapid Prototyping

Source: ChromaDB Documentation

import chromadb
from chromadb.config import Settings

# Production configuration
client = chromadb.HttpClient(
    host="localhost",
    port=8000,
    settings=Settings(
        chroma_db_impl="duckdb+parquet",
        persist_directory="/data/chroma",
        anonymized_telemetry=False
    )
)

# Create collection
collection = client.create_collection(
    name="documents",
    metadata={"hnsw:space": "cosine"},
    embedding_function=embedding_fn
)

# Add documents with metadata
collection.add(
    documents=texts,
    metadatas=[{"source": "doc1"} for _ in texts],
    ids=[f"id_{i}" for i in range(len(texts))]
)
Best For

Datasets <1M vectors, fast prototyping, development environments.