3.1 Qdrant Configuration for Production
Source: Qdrant Benchmarks
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
client = QdrantClient(host="localhost", port=6333)
# Create collection with HNSW index
client.create_collection(
collection_name="documents",
vectors_config=VectorParams(
size=768, # Match your embedding dimension
distance=Distance.COSINE,
on_disk=False, # Keep in RAM for speed
),
hnsw_config={
"m": 16, # Connections per layer
"ef_construct": 100, # Construction time accuracy
},
optimizers_config={
"memmap_threshold": 20000,
}
)
# Search with HNSW parameters
results = client.search(
collection_name="documents",
query_vector=query_embedding,
limit=10,
search_params={
"hnsw_ef": 128, # Query time accuracy (higher = more accurate)
"exact": False,
}
)
Key Insight
Qdrant achieves highest RPS and lowest latencies in most benchmarks (2024). Milvus excels at indexing speed.
3.2 ChromaDB for Rapid Prototyping
Source: ChromaDB Documentation
import chromadb
from chromadb.config import Settings
# Production configuration
client = chromadb.HttpClient(
host="localhost",
port=8000,
settings=Settings(
chroma_db_impl="duckdb+parquet",
persist_directory="/data/chroma",
anonymized_telemetry=False
)
)
# Create collection
collection = client.create_collection(
name="documents",
metadata={"hnsw:space": "cosine"},
embedding_function=embedding_fn
)
# Add documents with metadata
collection.add(
documents=texts,
metadatas=[{"source": "doc1"} for _ in texts],
ids=[f"id_{i}" for i in range(len(texts))]
)
Best For
Datasets <1M vectors, fast prototyping, development environments.