Merge pull request #228 from oracle-samples/218-testbed

corradodebari · web-flow · commit b3fa2c07b1f5 · 2025-08-05T12:57:46.000+02:00
218 testbed
diff --git a/.gitignore b/.gitignore
@@ -66,3 +66,4 @@ spring_ai/create_user.sql
 spring_ai/drop.sql
 src/client/spring_ai/target/classes/*
 api_server_key
+.env
diff --git a/src/.streamlit/config.toml b/src/.streamlit/config.toml
@@ -4,7 +4,14 @@
 disableWidgetStateDuplicationWarning = true
 
 [theme]
-font = "Source Sans Pro"
+font = "sans-serif-pro"
+headingFont = "sans-serif"
+codeFont = "monospace"
+
+[theme.sidebar]
+font = "sans-serif"
+headingFont = "sans-serif"
+codeFont = "monospace"
 
 [browser]
 gatherUsageStats = false
diff --git a/src/client/content/testbed.py b/src/client/content/testbed.py
@@ -127,7 +127,7 @@ def create_gauge(value):
     st.dataframe(full_report, hide_index=True)
 
     # Download Button
-    download_file("Download Report", report["html_report"], "evaluation_report.html", "text/html")
+    # download_file("Download Report", report["html_report"], "evaluation_report.html", "text/html") #CDB
 
 
 @st.cache_data
diff --git a/src/launch_server.py b/src/launch_server.py
@@ -2,7 +2,8 @@
 Copyright (c) 2024, 2025, Oracle and/or its affiliates.
 Licensed under the Universal Permissive License v1.0 as shown at http://oss.oracle.com/licenses/upl.
 """
-# spell-checker:ignore fastapi, laddr, checkpointer, langgraph, litellm, noauth, apiserver, configfile, selectai
+# spell-checker:ignore fastapi laddr checkpointer langgraph litellm
+# spell-checker:ignore noauth apiserver configfile selectai giskard ollama llms
 # pylint: disable=redefined-outer-name,wrong-import-position
 
 import os
@@ -20,6 +21,9 @@
 if "TNS_ADMIN" not in os.environ:
     os.environ["TNS_ADMIN"] = os.path.join(app_home, "tns_admin")
 
+# Patch litellm for Giskard/Ollama issue
+import server.patches.litellm_patch  # pylint: disable=unused-import
+
 import argparse
 import queue
 import secrets
@@ -148,9 +152,9 @@ def verify_key(
 
 def register_endpoints(noauth: APIRouter, auth: APIRouter):
     """Register API Endpoints - Imports to avoid bootstrapping before config file read
-       New endpoints need to be registered in server.api.v1.__init__.py
+    New endpoints need to be registered in server.api.v1.__init__.py
     """
-    import server.api.v1 as api_v1 # pylint: disable=import-outside-toplevel
+    import server.api.v1 as api_v1  # pylint: disable=import-outside-toplevel
 
     # No-Authentication (probes only)
     noauth.include_router(api_v1.probes.noauth, prefix="/v1", tags=["Probes"])
@@ -166,6 +170,7 @@ def register_endpoints(noauth: APIRouter, auth: APIRouter):
     auth.include_router(api_v1.settings.auth, prefix="/v1/settings", tags=["Tools - Settings"])
     auth.include_router(api_v1.testbed.auth, prefix="/v1/testbed", tags=["Tools - Testbed"])
 
+
 #############################################################################
 # APP FACTORY
 #############################################################################
diff --git a/src/server/api/utils/testbed.py b/src/server/api/utils/testbed.py
@@ -334,7 +334,8 @@ def clean(orig_html):
         "report": full_report.to_dict(),
         "correct_by_topic": by_topic.to_dict(),
         "failures": failures.to_dict(),
-        "html_report": clean(html_report),
+        #"html_report": clean(html_report), #CDB
+        "html_report": '<html><body></body></html>'
     }
     logger.debug("Evaluation Results: %s", evaluation_results)
     evaluation = schema.EvaluationReport(**evaluation_results)
diff --git a/src/server/api/v1/testbed.py b/src/server/api/v1/testbed.py
@@ -225,7 +225,9 @@ def get_answer(question: str):
     oci_config = oci.get_oci(client)
     judge_client = core_models.get_client({"model": judge}, oci_config, True)
     try:
-        report = evaluate(get_answer, testset=loaded_testset, llm_client=judge_client, metrics=[correctness_metric])
+        #report = evaluate(get_answer, testset=loaded_testset, llm_client=judge_client, metrics=[correctness_metric]) #CDB
+        report = evaluate(get_answer, testset=loaded_testset, llm_client=judge_client, metrics=None) #CDB
+
     except KeyError as ex:
         if str(ex) == "'correctness'":
             raise HTTPException(status_code=500, detail="Unable to determine the correctness; please retry.") from ex
diff --git a/src/server/patches/litellm_patch.py b/src/server/patches/litellm_patch.py
@@ -0,0 +1,74 @@
+"""
+Copyright (c) 2024, 2025, Oracle and/or its affiliates.
+Licensed under the Universal Permissive License v1.0 as shown at http://oss.oracle.com/licenses/upl.
+"""
+# spell-checker:ignore litellm giskard ollama llms
+# pylint: disable=unused-argument,protected-access
+
+from typing import TYPE_CHECKING, List, Optional, Any
+import time
+import litellm
+from litellm.llms.ollama.completion.transformation import OllamaConfig
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ModelResponse
+from httpx._models import Response
+
+import common.logging_config as logging_config
+
+logger = logging_config.logging.getLogger("patches.litellm_patch")
+
+# Only patch if not already patched
+if not getattr(OllamaConfig.transform_response, "_is_custom_patch", False):
+    if TYPE_CHECKING:
+        from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+        LiteLLMLoggingObj = _LiteLLMLoggingObj
+    else:
+        LiteLLMLoggingObj = Any
+
+    def custom_transform_response(
+        self,
+        model: str,
+        raw_response: Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: str,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ):
+        """Custom transform response from .venv/lib/python3.11/site-packages/litellm/llms/ollama/completion/transformation.py"""
+        logger.info("Custom transform_response is running")
+        response_json = raw_response.json()
+
+        model_response.choices[0].finish_reason = "stop"
+        model_response.choices[0].message.content = response_json["response"]
+
+        _prompt = request_data.get("prompt", "")
+        prompt_tokens = response_json.get(
+            "prompt_eval_count",
+            len(encoding.encode(_prompt, disallowed_special=())),
+        )
+        completion_tokens = response_json.get("eval_count", len(response_json.get("message", {}).get("content", "")))
+
+        setattr(
+            model_response,
+            "usage",
+            litellm.Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens,
+            ),
+        )
+        model_response.created = int(time.time())
+        model_response.model = "ollama/" + model
+        return model_response
+
+    # Mark it to avoid double patching
+    custom_transform_response._is_custom_patch = True
+
+    # Patch it
+    OllamaConfig.transform_response = custom_transform_response