💬 LLM Prompt EngineeringJune 5, 2026✅ Tests passing

Prompt Optimizer

A CLI tool that helps AI developers systematically optimize prompts for large language models by testing variations and scoring their outputs. The tool allows users to define multiple prompt templates, supply test cases, and automatically evaluate and rank the effectiveness of each prompt using customizable scoring functions.

View on GitHub Download ZIP

Share:X / Twitter LinkedIn Reddit Hacker News

What It Does

Generate prompt variations by replacing placeholders.
Evaluate prompts using test cases and a custom scoring function.
Save results in JSON or CSV format.

Installation

Install the required Python packages:

pip install openai pandas numpy

Usage

Run the tool using the following command:

python prompt_optimizer.py --prompt "Translate {text} to {language}" \
                           --test_cases test_cases.json \
                           --scorer scorer.py \
                           --output_format json \
                           --output_path results.json

Arguments

--prompt: Base prompt template with placeholders.
--test_cases: Path to JSON file containing test cases.
--scorer: Path to Python script defining scoring function.
--output_format: Output format for results (json or csv).
--output_path: Path to save the results.

Source Code

import argparse
import json
import os
import importlib.util
import pandas as pd
import numpy as np
import openai

def load_test_cases(file_path):
    """Load test cases from a JSON file."""
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        raise ValueError(f"Error loading test cases: {e}")

def load_scorer(file_path):
    """Load scoring function from a Python script."""
    try:
        spec = importlib.util.spec_from_file_location("scorer", file_path)
        scorer_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(scorer_module)
        if not hasattr(scorer_module, 'score'):
            raise ValueError("Scorer script must define a 'score' function.")
        return scorer_module.score
    except Exception as e:
        raise ValueError(f"Error loading scorer script: {e}")

def generate_prompt_variations(base_prompt, placeholders):
    """Generate all possible prompt variations by replacing placeholders."""
    from itertools import product
    keys = list(placeholders.keys())
    values = [placeholders[key] for key in keys]
    variations = []

    for combination in product(*values):
        prompt = base_prompt
        for key, value in zip(keys, combination):
            prompt = prompt.replace(f"{{{key}}}", value)
        variations.append(prompt)

    return variations

def query_llm(prompt):
    """Query the OpenAI API with a prompt."""
    try:
        response = openai.Completion.create(
            engine="text-davinci-003",
            prompt=prompt,
            max_tokens=100
        )
        return response.choices[0].text.strip()
    except Exception as e:
        return f"Error querying LLM: {e}"

def evaluate_prompts(prompts, test_cases, scorer):
    """Evaluate each prompt using the test cases and scoring function."""
    results = []

    for prompt in prompts:
        scores = []
        for test_case in test_cases:
            filled_prompt = prompt.format(**test_case)
            output = query_llm(filled_prompt)
            score = scorer(test_case, output)
            scores.append(score)

        avg_score = np.mean(scores)
        results.append({"prompt": prompt, "average_score": avg_score})

    return sorted(results, key=lambda x: x['average_score'], reverse=True)

def save_results(results, output_format, output_path):
    """Save results to a file in the specified format."""
    try:
        if output_format == "json":
            with open(output_path, 'w') as f:
                json.dump(results, f, indent=4)
        elif output_format == "csv":
            df = pd.DataFrame(results)
            df.to_csv(output_path, index=False)
        else:
            raise ValueError("Unsupported output format. Use 'json' or 'csv'.")
    except Exception as e:
        raise ValueError(f"Error saving results: {e}")

def main():
    parser = argparse.ArgumentParser(description="Prompt Optimizer")
    parser.add_argument("--prompt", required=True, help="Base prompt template with placeholders.")
    parser.add_argument("--test_cases", required=True, help="Path to JSON file containing test cases.")
    parser.add_argument("--scorer", required=True, help="Path to Python script defining scoring function.")
    parser.add_argument("--output_format", choices=["json", "csv"], default="json", help="Output format for results.")
    parser.add_argument("--output_path", default="results.json", help="Path to save the results.")
    args = parser.parse_args()

    try:
        test_cases = load_test_cases(args.test_cases)
        scorer = load_scorer(args.scorer)

        placeholders = {key: list(set(tc[key] for tc in test_cases if key in tc)) for key in test_cases[0].keys()}
        prompt_variations = generate_prompt_variations(args.prompt, placeholders)

        results = evaluate_prompts(prompt_variations, test_cases, scorer)
        save_results(results, args.output_format, args.output_path)
    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    main()

Community

Downloads

···

Rate this tool

No ratings yet — be the first!

Details

Tool Name: prompt_optimizer
Category: LLM Prompt Engineering
Generated: June 5, 2026
Tests: Passing ✅
Fix Loops: 2

Quick Install

Clone just this tool:

git clone --depth 1 --filter=blob:none --sparse \
  https://github.com/ptulin/autoaiforge.git
cd autoaiforge
git sparse-checkout set generated_tools/2026-06-05/prompt_optimizer
cd generated_tools/2026-06-05/prompt_optimizer
pip install -r requirements.txt 2>/dev/null || true
python prompt_optimizer.py

Links

View source on GitHub Raw README.md