Embedding

Below you will find an overview table of various text embedding APIs, along with example Python code.

API

Free Tier

Pricing Model

Docs

OpenAI

~$5 credit

~$0.0001/1K tokens

Azure OpenAI

$200 credit

Same as OpenAI (~$0.0001/1K tokens)

Google Vertex AI

$300 credit

~$0.025 / million chars

AWS Bedrock

No free tier, but AWS credits may apply

~$0.00002/1K tokens (Titan V2)

Cohere

Limited free tier

~$0.0001/1K tokens

Hugging Face

~$0.10 free compute monthly

Pay per second of compute

Jina

1M tokens free

Pay per token after

Example Code

1. OpenAI

# Requires: pip install openai numpy
import os
import numpy as np
from openai import OpenAI

def get_openai_embedding(text: str, model: str = "text-embedding-3-small") -> np.ndarray | None:
    """Gets embedding from OpenAI API."""
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        print("Error: OPENAI_API_KEY not set.")
        return None
    try:
        client = OpenAI(api_key=api_key)
        response = client.embeddings.create(
            model=model,
            input=text
        )
        embedding = response.data[0].embedding
        return np.array(embedding, dtype=np.float32)
    except Exception as e:
        print(f"Error calling OpenAI embedding API: {e}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_openai_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)
#     print(f"Dimension: {len(embedding_vector)}")

2. Azure OpenAI

# Requires: pip install openai numpy
import os
import numpy as np
from openai import AzureOpenAI

def get_azure_openai_embedding(text: str, deployment_name: str = "your-embedding-deployment") -> np.ndarray | None:
    """Gets embedding from Azure OpenAI API."""
    api_key = os.environ.get("AZURE_OPENAI_API_KEY")
    azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
    api_version = "2023-05-15" # Example version, adjust as needed

    if not api_key or not azure_endpoint:
        print("Error: AZURE_OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT not set.")
        return None
    try:
        client = AzureOpenAI(
            api_key=api_key,
            azure_endpoint=azure_endpoint,
            api_version=api_version
        )
        response = client.embeddings.create(
            model=deployment_name, # Use your deployment name
            input=text
        )
        embedding = response.data[0].embedding
        return np.array(embedding, dtype=np.float32)
    except Exception as e:
        print(f"Error calling Azure OpenAI embedding API: {e}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_azure_openai_embedding(text_to_embed, deployment_name="my-text-embedding-ada-002")
# if embedding_vector is not None:
#     print(embedding_vector)

3. Google Vertex AI

# Requires: pip install google-cloud-aiplatform numpy
import os
import numpy as np
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict

def get_vertex_embedding(text: str, project_id: str | None = None, location: str = "us-central1", model_name: str = "textembedding-gecko@001") -> np.ndarray | None:
    """Gets embedding from Google Vertex AI."""
    project_id = project_id or os.environ.get("GOOGLE_CLOUD_PROJECT")
    if not project_id:
        print("Error: GOOGLE_CLOUD_PROJECT not set and project_id not provided.")
        return None

    try:
        aiplatform.init(project=project_id, location=location)
        endpoint = aiplatform.Endpoint(f"projects/{project_id}/locations/{location}/publishers/google/models/{model_name}")

        instance = predict.instance.TextEmbeddingInstance(content=text).to_value()
        instances = [instance]
        response = endpoint.predict(instances=instances)

        embedding = response.predictions[0]['embeddings']['values']
        return np.array(embedding, dtype=np.float32)
    except Exception as e:
        print(f"Error calling Vertex AI embedding API: {e}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_vertex_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)

4. AWS Bedrock

# Requires: pip install boto3 numpy
import boto3
import json
import numpy as np
import os

def get_bedrock_embedding(text: str, region_name: str | None = None, model_id: str = "amazon.titan-embed-text-v2:0") -> np.ndarray | None:
    """Gets embedding from AWS Bedrock."""
    region = region_name or os.environ.get("AWS_REGION", "us-east-1")
    try:
        # Ensure AWS credentials are configured (e.g., via env vars, ~/.aws/credentials)
        client = boto3.client("bedrock-runtime", region_name=region)
        body = json.dumps({"inputText": text})
        response = client.invoke_model(
            modelId=model_id,
            contentType="application/json",
            accept="application/json",
            body=body
        )
        response_body = json.loads(response['body'].read())
        embedding = response_body.get('embedding')
        if embedding:
            return np.array(embedding, dtype=np.float32)
        else:
            print("Error: Embedding not found in Bedrock response.")
            return None
    except Exception as e:
        print(f"Error calling AWS Bedrock embedding API: {e}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_bedrock_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)

5. Cohere

# Requires: pip install cohere numpy
import cohere
import os
import numpy as np

def get_cohere_embedding(text: str, model: str = "embed-english-v3.0") -> np.ndarray | None:
    """Gets embedding from Cohere API."""
    api_key = os.environ.get("COHERE_API_KEY")
    if not api_key:
        print("Error: COHERE_API_KEY not set.")
        return None
    try:
        co = cohere.Client(api_key)
        # Cohere API expects a list of texts
        response = co.embed(texts=[text], model=model, input_type="search_document") # Adjust input_type as needed
        embedding = response.embeddings[0]
        return np.array(embedding, dtype=np.float32)
    except Exception as e:
        print(f"Error calling Cohere embedding API: {e}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_cohere_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)

6. Hugging Face Inference API

# Requires: pip install requests numpy
import requests
import os
import numpy as np

def get_hf_embedding(text: str, model_url: str = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2") -> np.ndarray | None:
    """Gets embedding from Hugging Face Inference API."""
    hf_token = os.environ.get("HUGGINGFACE_TOKEN")
    if not hf_token:
        print("Warning: HUGGINGFACE_TOKEN not set. Public models might work without it.")
        # Allow proceeding without token for public models, but auth is recommended

    headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
    payload = {"inputs": text}

    try:
        response = requests.post(model_url, headers=headers, json=payload)
        response.raise_for_status()
        # The response structure might vary; often it's a list of embeddings
        # For sentence-transformers, it's usually [[embedding]]
        embedding_list = response.json()
        if isinstance(embedding_list, list) and len(embedding_list) > 0 and isinstance(embedding_list[0], list):
             return np.array(embedding_list[0], dtype=np.float32)
        elif isinstance(embedding_list, list) and len(embedding_list) > 0 and isinstance(embedding_list[0], float):
             # Some models might return a flat list for single input
             return np.array(embedding_list, dtype=np.float32)
        else:
             print(f"Unexpected response structure from HF API: {embedding_list}")
             return None
    except requests.exceptions.RequestException as e:
        print(f"Error calling Hugging Face Inference API: {e}")
        return None
    except Exception as e:
        print(f"Error processing Hugging Face response: {e}")
        return None


# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_hf_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)

7. Jina AI

# Requires: pip install requests numpy
import requests
import os
import numpy as np

def get_jina_embedding(text: str, model: str = "jina-embeddings-v2-base-en") -> np.ndarray | None:
    """Gets embedding from Jina AI API."""
    jina_token = os.environ.get("JINA_API_KEY") # Or JINA_TOKEN depending on convention
    if not jina_token:
        print("Error: JINA_API_KEY not set.")
        return None

    url = "https://api.jina.ai/v1/embeddings" # Use v1 endpoint
    headers = {
        "Authorization": f"Bearer {jina_token}",
        "Accept-Encoding": "identity", # Recommended by Jina docs
        "Content-Type": "application/json"
    }
    payload = {
        "input": [text], # API expects a list
        "model": model
    }

    try:
        response = requests.post(url, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        embedding = result["data"][0]["embedding"]
        return np.array(embedding, dtype=np.float32)
    except requests.exceptions.RequestException as e:
        print(f"Error calling Jina AI embedding API: {e}")
        return None
    except (KeyError, IndexError) as e:
        print(f"Error parsing Jina AI response: {e}, Response: {response.text}")
        return None

# Example:
# text_to_embed = "Hello world"
# embedding_vector = get_jina_embedding(text_to_embed)
# if embedding_vector is not None:
#     print(embedding_vector)

Last updated