cortexlinux · sujay-d07 · Dec 27, 2025
@@ -19,6 +19,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Standardized Python version requirement to 3.10+
 - Improved documentation structure
 
+### Removed
+- **BREAKING**: Removed `--offline` flag (redundant with semantic cache and `CORTEX_PROVIDER=ollama`)
+  - The semantic cache automatically provides offline capability for cached requests
+  - For true offline operation, use `export CORTEX_PROVIDER=ollama` instead
+
 ### Fixed
 - (Pending) Shell injection vulnerability in coordinator.py
 - (Pending) CI/CD pipeline test directory path

@@ -140,19 +140,16 @@ def __init__(
         api_key: str,
         provider: str = "claude",
         model: str | None = None,
-        offline: bool = False,
     ):
         """Initialize the ask handler.
 
         Args:
             api_key: API key for the LLM provider
             provider: Provider name ("openai", "claude", or "ollama")
             model: Optional model name override
-            offline: If True, only use cached responses
         """
         self.api_key = api_key
         self.provider = provider.lower()
-        self.offline = offline
         self.model = model or self._default_model()
         self.info_gatherer = SystemInfoGatherer()
 
@@ -313,9 +310,6 @@ def ask(self, question: str) -> str:
             if cached is not None and len(cached) > 0:
                 return cached[0]
 
-        if self.offline:
-            raise RuntimeError("Offline mode: no cached response available for this question")
-
         # Call LLM
         try:
             if self.provider == "openai":

@@ -30,7 +30,6 @@ def __init__(self, verbose: bool = False):
         self.spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
         self.spinner_idx = 0
         self.verbose = verbose
-        self.offline = False
 
     def _debug(self, message: str):
         """Print debug info only in verbose mode"""
@@ -294,7 +293,6 @@ def ask(self, question: str) -> int:
             handler = AskHandler(
                 api_key=api_key,
                 provider=provider,
-                offline=self.offline,
             )
             answer = handler.ask(question)
             console.print(answer)
@@ -355,9 +353,7 @@ def install(
         try:
             self._print_status("🧠", "Understanding request...")
 
-            interpreter = CommandInterpreter(
-                api_key=api_key, provider=provider, offline=self.offline
-            )
+            interpreter = CommandInterpreter(api_key=api_key, provider=provider)
 
             self._print_status("📦", "Planning installation...")
 
@@ -1154,9 +1150,6 @@ def main():
     # Global flags
     parser.add_argument("--version", "-V", action="version", version=f"cortex {VERSION}")
     parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output")
-    parser.add_argument(
-        "--offline", action="store_true", help="Use cached responses only (no network calls)"
-    )
 
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
 
@@ -1329,7 +1322,6 @@ def main():
         return 0
 
     cli = CortexCLI(verbose=args.verbose)
-    cli.offline = bool(getattr(args, "offline", False))
 
     try:
         if args.command == "demo":

@@ -27,7 +27,6 @@ def __init__(
         api_key: str,
         provider: str = "openai",
         model: str | None = None,
-        offline: bool = False,
         cache: Optional["SemanticCache"] = None,
     ):
         """Initialize the command interpreter.
@@ -36,12 +35,10 @@ def __init__(
             api_key: API key for the LLM provider
             provider: Provider name ("openai", "claude", or "ollama")
             model: Optional model name override
-            offline: If True, only use cached responses
             cache: Optional SemanticCache instance for response caching
         """
         self.api_key = api_key
         self.provider = APIProvider(provider.lower())
-        self.offline = offline
 
         if cache is None:
             try:
@@ -350,9 +347,6 @@ def parse(self, user_input: str, validate: bool = True) -> list[str]:
             if cached is not None:
                 return cached
 
-        if self.offline:
-            raise RuntimeError("Offline mode: no cached response available for this request")
-
         if self.provider == APIProvider.OPENAI:
             commands = self._call_openai(user_input)
         elif self.provider == APIProvider.CLAUDE:

@@ -24,7 +24,6 @@ This document provides a comprehensive reference for all commands available in t
 ```bash
 cortex --version, -V    # Show version
 cortex --verbose, -v    # Show detailed output
-cortex --offline        # Use cached responses only (no network calls)
 cortex --help, -h       # Show help message
 ```
 

@@ -1,6 +1,6 @@
-# Issue 268 — End-user testing guide (semantic cache + offline)
+# Issue 268 — End-user testing guide (semantic cache)
 
-This guide covers only how to test the feature added for issue #268.
+This guide covers how to test the semantic cache feature added for issue #268.
 
 ## Prereqs
 
@@ -40,30 +40,31 @@ Expected:
 - `Misses` is >= 0
 - `Saved calls (approx)` increases when cached answers are used
 
-## Test 3: Offline mode (cached-only)
+## Test 3: Verify cache hit (repeat request)
 
-Run the same request with offline mode enabled.
+Run the original request again to verify cache is working:
 
 ```bash
-cortex --offline install nginx --dry-run
+cortex install nginx --dry-run
+cortex cache stats
 ```
 
 Expected:
-- If the request was warmed in Test 1, it should still print commands.
-- If the request was never cached, it should fail with an offline-cache-miss message.
+- The second run should be faster (no API call)
+- `cache stats` should show `Hits: 1`
 
-## Test 4: Verify cache hit (repeat request)
+## Test 4: True offline mode with Ollama
 
-Run the original request again to verify cache is working:
+For completely offline operation, use a local LLM:
 
 ```bash
+export CORTEX_PROVIDER=ollama
 cortex install nginx --dry-run
-cortex cache stats
 ```
 
 Expected:
-- The second run should be faster (no API call)
-- `cache stats` should show `Hits: 1`
+- Works without internet connection
+- Uses local Ollama model
 
 ## Notes
 

@@ -174,14 +174,6 @@ def test_ask_with_openai_mock(self, mock_openai):
         self.assertEqual(answer, "TensorFlow is compatible with your system.")
         mock_openai.assert_called_once()
 
-    def test_ask_offline_no_cache(self):
-        """Test that offline mode raises error when no cache hit."""
-        handler = AskHandler(api_key="fake-key", provider="fake", offline=True)
-        handler.cache = None
-        with self.assertRaises(RuntimeError) as ctx:
-            handler.ask("Random question that's not cached")
-        self.assertIn("Offline mode", str(ctx.exception))
-
     def test_ask_caches_response(self):
         """Test that responses are cached after successful API call."""
         from cortex.semantic_cache import SemanticCache

@@ -60,8 +60,13 @@ def test_print_success(self, mock_stdout):
 
     @patch.dict(os.environ, {}, clear=True)
     def test_install_no_api_key(self):
-        result = self.cli.install("docker")
-        self.assertEqual(result, 1)
+        # When no API key is set, the CLI falls back to Ollama.
+        # If Ollama is running, this should succeed. If not, it should fail.
+        # We'll mock Ollama to be unavailable to test the failure case.
+        with patch("cortex.llm.interpreter.CommandInterpreter.parse") as mock_parse:
+            mock_parse.side_effect = RuntimeError("Ollama not available")
+            result = self.cli.install("docker")
+            self.assertEqual(result, 1)
 
     @patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-openai-key-123"}, clear=True)
     @patch("cortex.cli.CommandInterpreter")