diff --git a/CHANGELOG.md b/CHANGELOG.md index d44d3a88..466a3ffc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Standardized Python version requirement to 3.10+ - Improved documentation structure +### Removed +- **BREAKING**: Removed `--offline` flag (redundant with semantic cache and `CORTEX_PROVIDER=ollama`) + - The semantic cache automatically provides offline capability for cached requests + - For true offline operation, use `export CORTEX_PROVIDER=ollama` instead + ### Fixed - (Pending) Shell injection vulnerability in coordinator.py - (Pending) CI/CD pipeline test directory path diff --git a/cortex/ask.py b/cortex/ask.py index 2aa0b932..33c06351 100644 --- a/cortex/ask.py +++ b/cortex/ask.py @@ -140,7 +140,6 @@ def __init__( api_key: str, provider: str = "claude", model: str | None = None, - offline: bool = False, ): """Initialize the ask handler. @@ -148,11 +147,9 @@ def __init__( api_key: API key for the LLM provider provider: Provider name ("openai", "claude", or "ollama") model: Optional model name override - offline: If True, only use cached responses """ self.api_key = api_key self.provider = provider.lower() - self.offline = offline self.model = model or self._default_model() self.info_gatherer = SystemInfoGatherer() @@ -313,9 +310,6 @@ def ask(self, question: str) -> str: if cached is not None and len(cached) > 0: return cached[0] - if self.offline: - raise RuntimeError("Offline mode: no cached response available for this question") - # Call LLM try: if self.provider == "openai": diff --git a/cortex/cli.py b/cortex/cli.py index 274a4f55..8ebffa85 100644 --- a/cortex/cli.py +++ b/cortex/cli.py @@ -30,7 +30,6 @@ def __init__(self, verbose: bool = False): self.spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] self.spinner_idx = 0 self.verbose = verbose - self.offline = False def _debug(self, message: str): """Print debug info only in verbose mode""" @@ -294,7 +293,6 @@ def ask(self, question: str) -> int: handler = AskHandler( api_key=api_key, provider=provider, - offline=self.offline, ) answer = handler.ask(question) console.print(answer) @@ -355,9 +353,7 @@ def install( try: self._print_status("🧠", "Understanding request...") - interpreter = CommandInterpreter( - api_key=api_key, provider=provider, offline=self.offline - ) + interpreter = CommandInterpreter(api_key=api_key, provider=provider) self._print_status("📦", "Planning installation...") @@ -1154,9 +1150,6 @@ def main(): # Global flags parser.add_argument("--version", "-V", action="version", version=f"cortex {VERSION}") parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output") - parser.add_argument( - "--offline", action="store_true", help="Use cached responses only (no network calls)" - ) subparsers = parser.add_subparsers(dest="command", help="Available commands") @@ -1329,7 +1322,6 @@ def main(): return 0 cli = CortexCLI(verbose=args.verbose) - cli.offline = bool(getattr(args, "offline", False)) try: if args.command == "demo": diff --git a/cortex/llm/interpreter.py b/cortex/llm/interpreter.py index 44d4b110..74870d75 100644 --- a/cortex/llm/interpreter.py +++ b/cortex/llm/interpreter.py @@ -27,7 +27,6 @@ def __init__( api_key: str, provider: str = "openai", model: str | None = None, - offline: bool = False, cache: Optional["SemanticCache"] = None, ): """Initialize the command interpreter. @@ -36,12 +35,10 @@ def __init__( api_key: API key for the LLM provider provider: Provider name ("openai", "claude", or "ollama") model: Optional model name override - offline: If True, only use cached responses cache: Optional SemanticCache instance for response caching """ self.api_key = api_key self.provider = APIProvider(provider.lower()) - self.offline = offline if cache is None: try: @@ -350,9 +347,6 @@ def parse(self, user_input: str, validate: bool = True) -> list[str]: if cached is not None: return cached - if self.offline: - raise RuntimeError("Offline mode: no cached response available for this request") - if self.provider == APIProvider.OPENAI: commands = self._call_openai(user_input) elif self.provider == APIProvider.CLAUDE: diff --git a/docs/COMMANDS.md b/docs/COMMANDS.md index 6e4eea4e..173f804a 100644 --- a/docs/COMMANDS.md +++ b/docs/COMMANDS.md @@ -24,7 +24,6 @@ This document provides a comprehensive reference for all commands available in t ```bash cortex --version, -V # Show version cortex --verbose, -v # Show detailed output -cortex --offline # Use cached responses only (no network calls) cortex --help, -h # Show help message ``` diff --git a/docs/ISSUE-268-TESTING.md b/docs/ISSUE-268-TESTING.md index c40b91d5..51ffe9b8 100644 --- a/docs/ISSUE-268-TESTING.md +++ b/docs/ISSUE-268-TESTING.md @@ -1,6 +1,6 @@ -# Issue 268 — End-user testing guide (semantic cache + offline) +# Issue 268 — End-user testing guide (semantic cache) -This guide covers only how to test the feature added for issue #268. +This guide covers how to test the semantic cache feature added for issue #268. ## Prereqs @@ -40,30 +40,31 @@ Expected: - `Misses` is >= 0 - `Saved calls (approx)` increases when cached answers are used -## Test 3: Offline mode (cached-only) +## Test 3: Verify cache hit (repeat request) -Run the same request with offline mode enabled. +Run the original request again to verify cache is working: ```bash -cortex --offline install nginx --dry-run +cortex install nginx --dry-run +cortex cache stats ``` Expected: -- If the request was warmed in Test 1, it should still print commands. -- If the request was never cached, it should fail with an offline-cache-miss message. +- The second run should be faster (no API call) +- `cache stats` should show `Hits: 1` -## Test 4: Verify cache hit (repeat request) +## Test 4: True offline mode with Ollama -Run the original request again to verify cache is working: +For completely offline operation, use a local LLM: ```bash +export CORTEX_PROVIDER=ollama cortex install nginx --dry-run -cortex cache stats ``` Expected: -- The second run should be faster (no API call) -- `cache stats` should show `Hits: 1` +- Works without internet connection +- Uses local Ollama model ## Notes diff --git a/tests/test_ask.py b/tests/test_ask.py index aaa9a237..0fe53176 100644 --- a/tests/test_ask.py +++ b/tests/test_ask.py @@ -174,14 +174,6 @@ def test_ask_with_openai_mock(self, mock_openai): self.assertEqual(answer, "TensorFlow is compatible with your system.") mock_openai.assert_called_once() - def test_ask_offline_no_cache(self): - """Test that offline mode raises error when no cache hit.""" - handler = AskHandler(api_key="fake-key", provider="fake", offline=True) - handler.cache = None - with self.assertRaises(RuntimeError) as ctx: - handler.ask("Random question that's not cached") - self.assertIn("Offline mode", str(ctx.exception)) - def test_ask_caches_response(self): """Test that responses are cached after successful API call.""" from cortex.semantic_cache import SemanticCache diff --git a/tests/test_cli.py b/tests/test_cli.py index 047f9a46..1f97bc1a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -60,8 +60,13 @@ def test_print_success(self, mock_stdout): @patch.dict(os.environ, {}, clear=True) def test_install_no_api_key(self): - result = self.cli.install("docker") - self.assertEqual(result, 1) + # When no API key is set, the CLI falls back to Ollama. + # If Ollama is running, this should succeed. If not, it should fail. + # We'll mock Ollama to be unavailable to test the failure case. + with patch("cortex.llm.interpreter.CommandInterpreter.parse") as mock_parse: + mock_parse.side_effect = RuntimeError("Ollama not available") + result = self.cli.install("docker") + self.assertEqual(result, 1) @patch.dict(os.environ, {"OPENAI_API_KEY": "sk-test-openai-key-123"}, clear=True) @patch("cortex.cli.CommandInterpreter")