switch provider based on environment variables

taqtiqa-mark · taqtiqa-mark · commit c6eaaffd7b82 · 2025-10-24T04:59:32.000+11:00
diff --git a/README.md b/README.md
@@ -86,14 +86,7 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
    pip install -r requirements.txt
    ```
 
-4. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the [AI Studio key](https://aistudio.google.com/app/apikey) with this client for Gemini Pro 2.5:
-
-   ```python
-   client = genai.Client(
-     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
-   )
-   ```
-
+4. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, url, and API key (e.g. `XAI_MODEL`, `XAI_URL`,`XAI_API_KEY`). If using Ollama, the url is `http://localhost:11434/` and the API key can be omitted.
    You can use your own models. We highly recommend the latest models with thinking capabilities (Claude 3.7 with thinking, O1). You can verify that it is correctly set up by running:
    ```bash
    python utils/call_llm.py
diff --git a/utils/call_llm.py b/utils/call_llm.py
@@ -25,7 +25,33 @@
 # Simple cache configuration
 cache_file = "llm_cache.json"
 
-def call_llm(prompt, use_cache: bool = True) -> str:
+
+def load_cache():
+    try:
+        with open(cache_file, 'r') as f:
+            return json.load(f)
+    except:
+        logger.warning(f"Failed to load cache.")
+    return {}
+
+
+def save_cache(cache):
+    try:
+        with open(cache_file, 'w') as f:
+            json.dump(cache, f)
+    except:
+        logger.warning(f"Failed to save cache")
+
+
+def get_llm_provider():
+    provider = os.getenv("LLM_PROVIDER")
+    if not provider and (os.getenv("GEMINI_PROJECT_ID") or os.getenv("GEMINI_API_KEY")):
+        provider = "GEMINI"
+    # if necessary, add ANTHROPIC/OPENAI
+    return provider
+
+
+def _call_llm_provider(prompt: str) -> str:
     """
     Call an LLM provider based on environment variables.
     Environment variables:
@@ -59,7 +85,7 @@ def call_llm(prompt, use_cache: bool = True) -> str:
         raise ValueError(f"{base_url_var} environment variable is required")
 
     # Append the endpoint to the base URL
-    url = f"{base_url}/v1/chat/completions"
+    url = f"{base_url.rstrip('/')}/v1/chat/completions"
 
     # Configure headers and payload based on provider
     headers = {
@@ -98,6 +124,58 @@ def call_llm(prompt, use_cache: bool = True) -> str:
     except ValueError:
         raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")
 
+# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
+def call_llm(prompt: str, use_cache: bool = True) -> str:
+    # Log the prompt
+    logger.info(f"PROMPT: {prompt}")
+
+    # Check cache if enabled
+    if use_cache:
+        # Load cache from disk
+        cache = load_cache()
+        # Return from cache if exists
+        if prompt in cache:
+            logger.info(f"RESPONSE: {cache[prompt]}")
+            return cache[prompt]
+
+    provider = get_llm_provider()
+    if provider == "GEMINI":
+        response_text = _call_llm_gemini(prompt)
+    else:  # generic method using a URL that is OpenAI compatible API (Ollama, ...)
+        response_text = _call_llm_provider(prompt)
+
+    # Log the response
+    logger.info(f"RESPONSE: {response_text}")
+
+    # Update cache if enabled
+    if use_cache:
+        # Load cache again to avoid overwrites
+        cache = load_cache()
+        # Add to cache and save
+        cache[prompt] = response_text
+        save_cache(cache)
+
+    return response_text
+
+
+def _call_llm_gemini(prompt: str) -> str:
+    if os.getenv("GEMINI_PROJECT_ID"):
+        client = genai.Client(
+            vertexai=True,
+            project=os.getenv("GEMINI_PROJECT_ID"),
+            location=os.getenv("GEMINI_LOCATION", "us-central1")
+        )
+    elif os.getenv("GEMINI_API_KEY"):
+        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+    else:
+        raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment")
+    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
+    response = client.models.generate_content(
+        model=model,
+        contents=[prompt]
+    )
+    return response.text
+
 if __name__ == "__main__":
     test_prompt = "Hello, how are you?"