Instead, we offer examples that you can implement yourself. This approach gives you more flexibility and control over your project's dependencies and functionality.
Below is a table of the popular vector search solutions:
FAISS is primarily a C++ library with Python bindings, optimized for performance. It's typically used for in-memory indexing or self-hosted scenarios. Direct usage from Node.js/TypeScript is less common without a dedicated server or WASM compilation.
# Requires: pip install faiss-cpu # or faiss-gpuimport faissimport numpy as npfrom typing import List, Tuple, Any # Added for type hintsdefcreate_faiss_index(dimension:int)-> faiss.Index:"""Creates a simple FAISS index."""# Example: Flat L2 index index = faiss.IndexFlatL2(dimension)return indexdefadd_to_faiss_index(index: faiss.Index,vectors: np.ndarray):"""Adds vectors to the FAISS index."""# Ensure vectors are float32if vectors.dtype !='float32': vectors = vectors.astype('float32') index.add(vectors)defsearch_faiss_index(index: faiss.Index,query_vector: np.ndarray,top_k:int=5)-> Tuple[np.ndarray, np.ndarray]:"""Searches the FAISS index."""if query_vector.dtype !='float32': query_vector = query_vector.astype('float32')# Ensure query is 2D arrayif query_vector.ndim ==1: query_vector = np.array([query_vector]) distances, indices = index.search(query_vector, top_k)return distances, indices# Example Usage:# d = 128 # Dimensionality of embeddings# index = create_faiss_index(d)# print(f"Index created. Is trained: {index.is_trained}, Total vectors: {index.ntotal}")# data_vectors = np.random.random((1000, d)).astype('float32')# add_to_faiss_index(index, data_vectors)# print(f"Added {data_vectors.shape[0]} vectors. Total vectors: {index.ntotal}")# query = np.random.random((1, d)).astype('float32')# D, I = search_faiss_index(index, query, k=5)# print("Distances:", D)# print("Neighbors:", I)# In production, you would also add functions to:# - save_index(index, filename) -> faiss.write_index(index, filename)# - load_index(filename) -> faiss.read_index(filename)
// FAISS is primarily a C++/Python library.
// Direct usage in TypeScript often involves:
// 1. Calling a Python backend service that uses FAISS.
// 2. Using community-maintained WASM ports (may have limitations).
// 3. Using alternative JS-native vector search libraries like hnswlib-node.
// Example using hnswlib-node (conceptual - requires installation)
// npm install hnswlib-node
/*
import { HierarchicalNSW } from 'hnswlib-node';
async function exampleHNSW() {
const dim = 128;
const maxElements = 1000;
// Initialize index
const index = new HierarchicalNSW('l2', dim); // 'l2' for Euclidean distance
index.initIndex(maxElements);
// Add vectors (example data)
for (let i = 0; i < maxElements; i++) {
const vector = Array.from({ length: dim }, () => Math.random());
index.addPoint(vector, i); // Add vector with its ID (index i)
}
// Query
const queryVector = Array.from({ length: dim }, () => Math.random());
const numNeighbors = 5;
const result = index.searchKnn(queryVector, numNeighbors);
console.log("HNSW Neighbors:", result.neighbors); // Indices
console.log("HNSW Distances:", result.distances);
}
// exampleHNSW();
*/
console.log('FAISS is typically used via Python or a dedicated service.')
console.log(
'Consider using a JS-native library like hnswlib-node or a managed vector DB for TypeScript projects.',
)
# Requires: pip install pinecone-client
import os
from pinecone import Pinecone, PodSpec
def init_pinecone() -> Pinecone | None:
"""Initializes Pinecone client."""
api_key = os.environ.get("PINECONE_API_KEY")
# environment = os.environ.get("PINECONE_ENVIRONMENT") # Legacy, use API key only
if not api_key:
print("Error: PINECONE_API_KEY not set.")
return None
try:
# pc = Pinecone(api_key=api_key, environment=environment) # Legacy init
pc = Pinecone(api_key=api_key)
print("Pinecone initialized.")
return pc
except Exception as e:
print(f"Error initializing Pinecone: {e}")
return None
def create_pinecone_index_if_not_exists(pc: Pinecone, index_name: str, dimension: int, metric: str = 'cosine', environment: str = 'gcp-starter'):
"""Creates a Pinecone index if it doesn't exist."""
if index_name not in pc.list_indexes().names:
print(f"Creating index '{index_name}'...")
try:
pc.create_index(
name=index_name,
dimension=dimension,
metric=metric,
spec=PodSpec(environment=environment) # Specify environment here
)
print(f"Index '{index_name}' created.")
except Exception as e:
print(f"Error creating Pinecone index: {e}")
else:
print(f"Index '{index_name}' already exists.")
# Example Usage:
# pc = init_pinecone()
# if pc:
# index_name = "my-caskada-index"
# dimension = 128
# create_pinecone_index_if_not_exists(pc, index_name, dimension)
# # Connect to the index
# try:
# index = pc.Index(index_name)
# # Upsert vectors
# vectors_to_upsert = [
# ("vec_id1", [0.1] * dimension, {"genre": "fiction"}), # With metadata
# ("vec_id2", [0.2] * dimension, {"year": 2023})
# ]
# print(f"Upserting {len(vectors_to_upsert)} vectors...")
# index.upsert(vectors=vectors_to_upsert)
# print("Upsert complete.")
# # Query
# query_vector = [0.15] * dimension
# print("Querying index...")
# response = index.query(vector=query_vector, top_k=3, include_metadata=True)
# print("Query response:", response)
# except Exception as e:
# print(f"Error interacting with Pinecone index: {e}")
// Requires: npm install @pinecone-database/pinecone
import { Pinecone, PodSpec } from '@pinecone-database/pinecone'
async function initPinecone(): Promise<Pinecone | null> {
/** Initializes Pinecone client. */
const apiKey = process.env.PINECONE_API_KEY
if (!apiKey) {
console.error('Error: PINECONE_API_KEY not set.')
return null
}
try {
const pc = new Pinecone({ apiKey }) // Use new init style
console.log('Pinecone initialized.')
return pc
} catch (error) {
console.error('Error initializing Pinecone:', error)
return null
}
}
async function createPineconeIndexIfNotExists(
pc: Pinecone,
indexName: string,
dimension: number,
metric: 'cosine' | 'euclidean' | 'dotproduct' = 'cosine',
environment: string = 'gcp-starter', // Or your specific environment
): Promise<void> {
/** Creates a Pinecone index if it doesn't exist. */
try {
const indexes = await pc.listIndexes()
if (!indexes.names?.includes(indexName)) {
console.log(`Creating index '${indexName}'...`)
await pc.createIndex({
name: indexName,
dimension: dimension,
metric: metric,
spec: {
pod: {
// Use PodSpec structure
environment: environment,
podType: 'p1.x1', // Example pod type, adjust as needed
},
},
})
console.log(`Index '${indexName}' created.`)
// Add a small delay for index readiness (optional but sometimes helpful)
await new Promise((resolve) => setTimeout(resolve, 5000))
} else {
console.log(`Index '${indexName}' already exists.`)
}
} catch (error) {
console.error(`Error creating or checking Pinecone index:`, error)
}
}
// Example Usage:
/*
async function pineconeExample() {
const pc = await initPinecone();
if (!pc) return;
const indexName = "my-caskada-index-ts";
const dimension = 128;
await createPineconeIndexIfNotExists(pc, indexName, dimension);
try {
const index = pc.index(indexName);
// Upsert vectors
const vectorsToUpsert = [
{ id: "ts_vec_id1", values: Array(dimension).fill(0.1), metadata: { genre: "fiction" } },
{ id: "ts_vec_id2", values: Array(dimension).fill(0.2), metadata: { year: 2023 } }
];
console.log(`Upserting ${vectorsToUpsert.length} vectors...`);
await index.upsert(vectorsToUpsert);
console.log("Upsert complete.");
// Query
const queryVector = Array(dimension).fill(0.15);
console.log("Querying index...");
const response = await index.query({ vector: queryVector, topK: 3, includeMetadata: true });
console.log("Query response:", response);
} catch (error) {
console.error("Error interacting with Pinecone index:", error);
}
}
pineconeExample();
*/
# Requires: pip install qdrant-client
import os
import qdrant_client
from qdrant_client.http.models import Distance, VectorParams, PointStruct, CollectionStatus
def init_qdrant_client() -> qdrant_client.QdrantClient | None:
"""Initializes Qdrant client."""
qdrant_url = os.environ.get("QDRANT_URL") # e.g., "http://localhost:6333" or cloud URL
api_key = os.environ.get("QDRANT_API_KEY") # Optional, for cloud
if not qdrant_url:
print("Error: QDRANT_URL not set.")
return None
try:
client = qdrant_client.QdrantClient(url=qdrant_url, api_key=api_key)
print("Qdrant client initialized.")
return client
except Exception as e:
print(f"Error initializing Qdrant client: {e}")
return None
def create_qdrant_collection_if_not_exists(client: qdrant_client.QdrantClient, collection_name: str, dimension: int, distance_metric: Distance = Distance.COSINE):
"""Creates a Qdrant collection if it doesn't exist."""
try:
collections = client.get_collections().collections
if not any(c.name == collection_name for c in collections):
print(f"Creating collection '{collection_name}'...")
client.recreate_collection( # Use recreate_collection for simplicity, or check/create
collection_name=collection_name,
vectors_config=VectorParams(size=dimension, distance=distance_metric)
)
print(f"Collection '{collection_name}' created.")
else:
print(f"Collection '{collection_name}' already exists.")
except Exception as e:
print(f"Error creating or checking Qdrant collection: {e}")
# Example Usage:
# client = init_qdrant_client()
# if client:
# collection_name = "caskada_qdrant_demo"
# dimension = 128
# create_qdrant_collection_if_not_exists(client, collection_name, dimension)
# try:
# # Upsert points
# points_to_upsert = [
# # Use UUIDs or sequential integers for IDs
# PointStruct(id=1, vector=[0.1] * dimension, payload={"type": "doc1", "source": "fileA.txt"}),
# PointStruct(id=2, vector=[0.2] * dimension, payload={"type": "doc2", "source": "fileB.txt"}),
# ]
# print(f"Upserting {len(points_to_upsert)} points...")
# # Use wait=True for confirmation, especially in scripts
# client.upsert(collection_name=collection_name, points=points_to_upsert, wait=True)
# print("Upsert complete.")
# # Search
# query_vector = [0.15] * dimension
# print("Searching collection...")
# search_result = client.search(
# collection_name=collection_name,
# query_vector=query_vector,
# limit=2 # Number of results to return
# )
# print("Search results:", search_result)
# except Exception as e:
# print(f"Error interacting with Qdrant collection: {e}")