diff --git a/.vscode/launch.json b/.vscode/launch.json index 7142960c4f9d..a62e8d6c3a77 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,7 +10,7 @@ "program": "${workspaceFolder}/target/debug/edit", "cwd": "${workspaceFolder}", "args": [ - "${workspaceFolder}/crates/edit/src/bin/edit/main.rs" + "${workspaceFolder}/assets/highlighting-tests/markdown.md" ], }, { @@ -23,7 +23,7 @@ "program": "${workspaceFolder}/target/debug/edit", "cwd": "${workspaceFolder}", "args": [ - "${workspaceFolder}/crates/edit/src/bin/edit/main.rs" + "${workspaceFolder}/assets/highlighting-tests/markdown.md" ], }, { @@ -40,7 +40,7 @@ "program": "${workspaceFolder}/target/debug/edit", "cwd": "${workspaceFolder}", "args": [ - "${workspaceFolder}/crates/edit/src/bin/edit/main.rs" + "${workspaceFolder}/assets/highlighting-tests/markdown.md" ], }, { diff --git a/Cargo.lock b/Cargo.lock index c7528121ff29..7cac8e8f3ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,38 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "argh" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ff18325c8a36b82f992e533ece1ec9f9a9db446bd1c14d4f936bac88fcd240" +dependencies = [ + "argh_derive", + "argh_shared", + "rust-fuzzy-search", +] + +[[package]] +name = "argh_derive" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb7b2b83a50d329d5d8ccc620f5c7064028828538bdf5646acd60dc1f767803" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "argh_shared" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a464143cc82dedcdc3928737445362466b7674b5db4e2eb8e869846d6d84f4f6" +dependencies = [ + "serde", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -226,6 +258,7 @@ version = "1.2.1" dependencies = [ "criterion", "libc", + "lsh", "stdext", "toml-span", "windows-sys", @@ -355,6 +388,16 @@ dependencies = [ "stdext", ] +[[package]] +name = "lsh-bin" +version = "0.0.0" +dependencies = [ + "anyhow", + "argh", + "lsh", + "stdext", +] + [[package]] name = "memchr" version = "2.7.6" @@ -514,6 +557,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "rust-fuzzy-search" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a157657054ffe556d8858504af8a672a054a6e0bd9e8ee531059100c0fa11bb2" + [[package]] name = "rustversion" version = "1.0.22" diff --git a/assets/highlighting-tests/bash.sh b/assets/highlighting-tests/bash.sh new file mode 100644 index 000000000000..dfd5238724db --- /dev/null +++ b/assets/highlighting-tests/bash.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# This is a comment + +readonly VAR1="Hello" # String literal +VAR2=42 # Integer literal +VAR3=$((VAR2 + 8)) # Arithmetic expansion +VAR4=$(echo "World") # Command substitution + +function greet() { # Function definition + local name="$1" # Local variable, parameter expansion + echo "${VAR1}, $name! $VAR4" # String, parameter expansion, variable +} + +greet "User" # Function call, string literal + +if [[ $VAR2 -gt 40 && $VAR3 -eq 50 ]]; then # Conditional, test, operators + echo "Numbers are correct" # String literal +elif (( VAR2 < 40 )); then # Arithmetic test + echo 'VAR2 is less than 40' # Single-quoted string +else + echo "Other case" +fi + +for i in {1..3}; do # Brace expansion, for loop + echo "Loop $i" # String, variable +done + +case "$VAR4" in # Case statement + World) echo "It's World";; # Pattern, string + *) echo "Unknown";; # Wildcard +esac + +arr=(one two three) # Array +echo "${arr[1]}" # Array access + +declare -A assoc # Associative array +assoc[key]="value" +echo "${assoc[key]}" + +# Here document +cat < /dev/null + +# Background job +sleep 1 & + +# Arithmetic assignment +let VAR2+=1 + +# Process substitution +diff <(echo foo) <(echo bar) + +# Command grouping +{ echo "Group 1"; echo "Group 2"; } + +# Escaped characters +echo "A quote: \" and a backslash: \\" + +# End of file diff --git a/assets/highlighting-tests/batch.bat b/assets/highlighting-tests/batch.bat new file mode 100644 index 000000000000..962ef66007af --- /dev/null +++ b/assets/highlighting-tests/batch.bat @@ -0,0 +1,41 @@ +@echo off +REM --- String, Variable, Label, Command, Operator, Number, Delimiter, Comment --- + +:: Label +:Start + +:: Variable assignment and usage +set "VAR1=Hello" +set VAR2=World + +:: String with spaces and special characters +set "STR=Batch ^& CMD!" + +:: Arithmetic operation (number, operator) +set /a SUM=5+10 + +:: IF statement (keyword, operator, string, variable) +if "%VAR1%"=="Hello" ( + echo %VAR1%, %VAR2%! %STR% +) else ( + echo Not matched! +) + +:: FOR loop (keyword, variable, delimiter, string) +for %%F in (*.bat) do ( + echo Found file: %%F +) + +:: CALL command (keyword, label) +call :SubRoutine + +:: GOTO command (keyword, label) +goto :End + +:: Subroutine with parameter +:SubRoutine +echo In subroutine with SUM=%SUM% +goto :eof + +:End +REM End of script diff --git a/assets/highlighting-tests/html.html b/assets/highlighting-tests/html.html new file mode 100644 index 000000000000..35682eee1b34 --- /dev/null +++ b/assets/highlighting-tests/html.html @@ -0,0 +1,51 @@ + + + + + + + HTML Syntax Test & Demo + + + + + +

Heading

+ + +

Text with < > & " ' A A entities

+
+
+ Description + + + + + + +
+ Inline bold text + +
+ + + + + + diff --git a/assets/highlighting-tests/markdown.md b/assets/highlighting-tests/markdown.md new file mode 100644 index 000000000000..c4845eb68014 --- /dev/null +++ b/assets/highlighting-tests/markdown.md @@ -0,0 +1,75 @@ +# H1 + +## H2 + +### H3 + +#### H4 + +##### H5 + +###### H6 + +regular +*italic* +_italic_ +**bold** +__bold__ +***bold italic*** +**_bold italic_** +__*bold italic*__ +~~strikethrough~~ +`inline code` +`` `literal` `` +\*not\* \_italic\_ # not a heading + +* Unordered item + - Nested item + * Third level +* Task list: + * [ ] To do + * [x] Done + * [ ] *Mixed* **formatting** with `code` +1. Ordered can start anywhere +2. …like here (intentional) + 1. Nested ordered + 2. Multiple paragraphs within a list item: + Still the same item. + +> A single-level quote +> +> > A nested quote with **bold** and `code` +> +> * List in a quote +> * [Link in quote](#links) + +Inline: [Example](https://example.com "Example Title") +Reference: [Ref Link][ref] and [Another][another-ref] +Relative: [This section](#tables) +Footnote: [^note] +[ref]: https://example.com +[another-ref]: https://github.com +[^note]: This is a footnote with **formatting** and a [link](https://github.com). + +Inline: ![Alt text](https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png "GitHub Mark") +Reference: ![Logo][logo-ref] +[logo-ref]: https://github.githubassets.com/images/modules/logos_page/GitHub-Logo.png "GitHub Logo" + +| Left | Center | Right | +| :---------- | :--------: | ----: | +| *italic* | `code` | 123 | +| **bold** | ~~strike~~ | 4.56 | +| [link][ref] | :tada: | `end` | + +```bash +# Shell +echo "Hello, world" | tr a-z A-Z +``` + +```json +{ + "name": "gfm-kitchen-sink", + "private": true, + "scripts": { "test": "echo ok" } +} +``` diff --git a/assets/highlighting-tests/powershell.ps1 b/assets/highlighting-tests/powershell.ps1 new file mode 100644 index 000000000000..e2bb01f39c6c --- /dev/null +++ b/assets/highlighting-tests/powershell.ps1 @@ -0,0 +1,78 @@ +# Single-line comment + +<# +Multi-line +comment +#> + +function Get-SampleData { + param( + [string]$Name = "World", # String literal, parameter + [int]$Count = 3 + ) + + $array = @(1, 2, 3) # Array literal + $hashtable = @{ Key1 = 'Value1'; Key2 = 42 } # Hashtable literal + + $nullVar = $null + $boolTrue = $true + $boolFalse = $false + + $regexMatch = "abc123" -match '\d+' # Regex literal + + for ($i = 0; $i -lt $Count; $i++) { + Write-Host "Hello, $Name! Iteration: $i" # Variable interpolation, string + } + + if ($hashtable.Key2 -eq 42) { + Write-Output "Hashtable value is 42" + } + elseif ($hashtable.Key2 -gt 40) { + Write-Output "Hashtable value is greater than 40" + } + else { + Write-Output "Hashtable value is less than or equal to 40" + } + + switch ($Name) { + "World" { Write-Host "Default name used." } + default { Write-Host "Custom name: $Name" } + } + + try { + throw "An error occurred" + } + catch { + Write-Warning $_ + } + finally { + Write-Verbose "Finally block executed" + } + + $script:globalVar = 99 # Scope modifier + + # Here-String + $hereString = @" +This is a here-string. +Name: $Name +"@ + + return $hereString +} + +# Command invocation, pipeline, splatting +$paramSplat = @{ + Name = 'PowerShell' + Count = 2 +} +Get-SampleData @paramSplat | Out-File -FilePath "./output.txt" + +# Type literal, member access, method call +[System.DateTime]::Now.ToString("yyyy-MM-dd") + +# Subexpression +Write-Host "2 + 2 = $($array[0] + $array[1])" + +# Command substitution +$pwdPath = $(Get-Location).Path +Write-Host "Current directory: $pwdPath" diff --git a/assets/highlighting-tests/properties.conf b/assets/highlighting-tests/properties.conf new file mode 100644 index 000000000000..3bd3b62693ba --- /dev/null +++ b/assets/highlighting-tests/properties.conf @@ -0,0 +1,13 @@ +# General Settings +[General] +enabled = true +debug = false +log_level = info +max_connections = 1000 + +[SSL] +enabled = true +cert_file = /etc/ssl/certs/server.crt +key_file = /etc/ssl/private/server.key +protocols = TLSv1.2, TLSv1.3 # Supported protocols: "TLSv1.2" and "TLSv1.3" +cipher_suite = "ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-GCM-SHA256" diff --git a/assets/highlighting-tests/xml.xml b/assets/highlighting-tests/xml.xml new file mode 100644 index 000000000000..ad2c2aa09a0d --- /dev/null +++ b/assets/highlighting-tests/xml.xml @@ -0,0 +1,39 @@ + + +]> + + + + + + + Text & < > " ' content + &custom; + + + + + + + Text before inline text after + + + A A 😀 + + + + + + !@#$%^*()_+-={}[]|\:;"'<>,. + + + + + + + diff --git a/assets/highlighting-tests/yaml.yml b/assets/highlighting-tests/yaml.yml new file mode 100644 index 000000000000..6dbf0abada91 --- /dev/null +++ b/assets/highlighting-tests/yaml.yml @@ -0,0 +1,43 @@ +# This is a comment +--- +string: "Hello, world!" +plain: plainValue +multiline: | + This is a + true + multiline string. +folded: > + This is a + true + folded string. +number_int: 42 +number_float: 3.1415 +number_scientific: 1.23e45 +number_negative: -7 +boolean_true: true +boolean_false: false +null_value: null +explicit_null: ~ +date: 2024-06-01 +timestamp: 2024-06-01T12:34:56Z +confusable_string_number: 1.23e45 1.23e45 # This is a comment +sequence: + - item1 + - item2 + - 3 + - true +mapping: + key1: value1 + key2: value2 +nested: + - name: Alice + age: 30 + married: false + - name: Bob + age: 25 + married: true +empty_sequence: [foo, 123, bar] +empty_mapping: { foo: bar } +literal_colon: "value:with:colons" +literal_dash: "-not-a-sequence" +special_chars: "Tab:\t Newline:\n Unicode:\u2713" diff --git a/crates/edit/Cargo.toml b/crates/edit/Cargo.toml index fcf8edeb79d1..3858b585cce7 100644 --- a/crates/edit/Cargo.toml +++ b/crates/edit/Cargo.toml @@ -19,6 +19,7 @@ harness = false debug-latency = [] [dependencies] +lsh.workspace = true stdext.workspace = true [target.'cfg(unix)'.dependencies] @@ -26,6 +27,7 @@ libc = "0.2" [build-dependencies] stdext.workspace = true +lsh.workspace = true # The default toml crate bundles its dependencies with bad compile times. Thanks. # Thankfully toml-span exists. FWIW the alternative is yaml-rust (without the 2 suffix). toml-span = { version = "0.6", default-features = false } diff --git a/crates/edit/benches/lib.rs b/crates/edit/benches/lib.rs index 2141e9029ef2..99a95f58e68f 100644 --- a/crates/edit/benches/lib.rs +++ b/crates/edit/benches/lib.rs @@ -3,13 +3,15 @@ use std::hint::black_box; use std::io::Cursor; +use std::path::Path; use std::{mem, vec}; use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; use edit::helpers::*; -use edit::{buffer, glob, hash, json, oklab, simd, unicode}; +use edit::{buffer, hash, json, lsh, oklab, simd, unicode}; use stdext::arena::{self, scratch_arena}; use stdext::collections::BVec; +use stdext::glob; use stdext::unicode::Utf8Chars; struct EditingTracePatch<'a>(usize, usize, &'a str); @@ -178,6 +180,34 @@ fn bench_json(c: &mut Criterion) { ); } +fn bench_lsh(c: &mut Criterion) { + let bytes = include_bytes!("../../../assets/highlighting-tests/markdown.md"); + let bytes = &bytes[..]; + let lang = lsh::LANGUAGES.iter().find(|lang| lang.id == "markdown").unwrap(); + let highlighter = lsh::Highlighter::new(black_box(&bytes), lang); + + c.benchmark_group("lsh").throughput(Throughput::Bytes(bytes.len() as u64)).bench_function( + "markdown", + |b| { + b.iter(|| { + let mut h = highlighter.clone(); + loop { + let scratch = scratch_arena(None); + let res = h.parse_next_line(&scratch); + if res.is_empty() { + break; + } + } + }) + }, + ); + + c.benchmark_group("lsh").bench_function("process_file_associations", |b| { + let path = Path::new("/some/long/path/to/file/foo.bar.foo.bar.foo.bar"); + b.iter(|| lsh::process_file_associations(lsh::FILE_ASSOCIATIONS, black_box(path))) + }); +} + fn bench_oklab(c: &mut Criterion) { c.benchmark_group("oklab") .bench_function("StraightRgba::as_oklab", |b| { @@ -284,6 +314,7 @@ fn bench(c: &mut Criterion) { bench_glob(c); bench_hash(c); bench_json(c); + bench_lsh(c); bench_oklab(c); bench_simd_lines_fwd(c); bench_simd_memchr2(c); diff --git a/crates/edit/build/main.rs b/crates/edit/build/main.rs index 4acb58a87817..96ae2f201421 100644 --- a/crates/edit/build/main.rs +++ b/crates/edit/build/main.rs @@ -3,6 +3,8 @@ #![allow(irrefutable_let_patterns)] +use stdext::arena::scratch_arena; + use crate::helpers::env_opt; mod helpers; @@ -24,12 +26,31 @@ fn main() { _ => TargetOs::Unix, }; + compile_lsh(); compile_i18n(); configure_icu(target_os); #[cfg(windows)] configure_windows_binary(target_os); } +fn compile_lsh() { + let scratch = scratch_arena(None); + + let lsh_path = lsh::compiler::builtin_definitions_path(); + let out_dir = env_opt("OUT_DIR"); + let out_path = format!("{out_dir}/lsh_definitions.rs"); + + let mut generator = lsh::compiler::Generator::new(&scratch); + match generator.read_directory(lsh_path).and_then(|_| generator.generate_rust()) { + Ok(c) => std::fs::write(out_path, c).unwrap(), + Err(err) => { + panic!("failed to compile lsh definitions: {err}"); + } + }; + + println!("cargo::rerun-if-changed={}", lsh_path.display()); +} + fn compile_i18n() { let i18n_path = "../../i18n/edit.toml"; diff --git a/crates/edit/src/bin/edit/apperr.rs b/crates/edit/src/bin/edit/apperr.rs index baa8d071a8e5..f8aea68da7e5 100644 --- a/crates/edit/src/bin/edit/apperr.rs +++ b/crates/edit/src/bin/edit/apperr.rs @@ -7,6 +7,8 @@ use edit::{buffer, icu}; #[derive(Debug)] pub enum Error { + SettingsInvalidJson, + SettingsInvalidValue, Io(io::Error), Icu(icu::Error), } diff --git a/crates/edit/src/bin/edit/documents.rs b/crates/edit/src/bin/edit/documents.rs index 23de9a77c33a..1bbdde61afdb 100644 --- a/crates/edit/src/bin/edit/documents.rs +++ b/crates/edit/src/bin/edit/documents.rs @@ -8,9 +8,11 @@ use std::{fs, io}; use edit::buffer::{RcTextBuffer, TextBuffer}; use edit::helpers::{CoordType, Point}; +use edit::lsh::{FILE_ASSOCIATIONS, Language, process_file_associations}; use edit::{path, sys}; use crate::apperr; +use crate::settings::Settings; use crate::state::DisplayablePathBuf; pub struct Document { @@ -20,6 +22,7 @@ pub struct Document { pub filename: String, pub file_id: Option, pub new_file_counter: usize, + pub language_override: Option>, } impl Document { @@ -62,15 +65,45 @@ impl Document { fn set_path(&mut self, path: PathBuf) { let filename = path.file_name().unwrap_or_default().to_string_lossy().into_owned(); let dir = path.parent().map(ToOwned::to_owned).unwrap_or_default(); + self.filename = filename; self.dir = Some(DisplayablePathBuf::from_path(dir)); self.path = Some(path); - self.update_file_mode(); + + self.buffer.borrow_mut().set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 }); + self.update_language(); + } + + pub fn auto_detect_language(&mut self) { + self.language_override = None; + self.update_language(); + } + + pub fn override_language(&mut self, lang: Option<&'static Language>) { + self.language_override = Some(lang); + self.update_language(); + } + + fn update_language(&mut self) { + self.buffer.borrow_mut().set_language(self.get_language()); } - fn update_file_mode(&mut self) { - let mut tb = self.buffer.borrow_mut(); - tb.set_ruler(if self.filename == "COMMIT_EDITMSG" { 72 } else { 0 }); + fn get_language(&self) -> Option<&'static Language> { + if let Some(lang) = self.language_override { + return lang; + } + + if let Some(path) = &self.path { + let settings = Settings::borrow(); + if let Some(lang) = process_file_associations(&settings.file_associations, path) { + return Some(lang); + } + if let Some(lang) = process_file_associations(FILE_ASSOCIATIONS, path) { + return Some(lang); + } + } + + None } } @@ -140,6 +173,7 @@ impl DocumentManager { filename: Default::default(), file_id: None, new_file_counter: 0, + language_override: None, }; self.gen_untitled_name(&mut doc); @@ -201,6 +235,7 @@ impl DocumentManager { filename: Default::default(), file_id, new_file_counter: 0, + language_override: None, }; doc.set_path(path); diff --git a/crates/edit/src/bin/edit/draw_menubar.rs b/crates/edit/src/bin/edit/draw_menubar.rs index a8e1da78c242..ffa829b932e4 100644 --- a/crates/edit/src/bin/edit/draw_menubar.rs +++ b/crates/edit/src/bin/edit/draw_menubar.rs @@ -7,6 +7,7 @@ use edit::tui::*; use stdext::arena_format; use crate::localization::*; +use crate::settings::Settings; use crate::state::*; pub fn draw_menubar(ctx: &mut Context, state: &mut State) { @@ -51,9 +52,18 @@ fn draw_menu_file(ctx: &mut Context, state: &mut State) { if ctx.menubar_menu_button(loc(LocId::FileSaveAs), 'A', vk::NULL) { state.wants_file_picker = StateFilePicker::SaveAs; } - if ctx.menubar_menu_button(loc(LocId::FileClose), 'C', kbmod::CTRL | vk::W) { - state.wants_close = true; - } + } + #[allow(irrefutable_let_patterns)] + if let path = Settings::borrow().path.as_path() + && ctx.menubar_menu_button(loc(LocId::FilePreferences), 'P', vk::NULL) + && let Err(err) = state.documents.add_file_path(path) + { + error_log_add(ctx, state, err); + } + if state.documents.active().is_some() + && ctx.menubar_menu_button(loc(LocId::FileClose), 'C', kbmod::CTRL | vk::W) + { + state.wants_close = true; } if ctx.menubar_menu_button(loc(LocId::FileExit), 'X', kbmod::CTRL | vk::Q) { state.wants_exit = true; diff --git a/crates/edit/src/bin/edit/draw_statusbar.rs b/crates/edit/src/bin/edit/draw_statusbar.rs index 44a688f834c5..f061527932af 100644 --- a/crates/edit/src/bin/edit/draw_statusbar.rs +++ b/crates/edit/src/bin/edit/draw_statusbar.rs @@ -6,6 +6,7 @@ use edit::fuzzy::score_fuzzy; use edit::helpers::*; use edit::icu; use edit::input::vk; +use edit::lsh::LANGUAGES; use edit::tui::*; use stdext::arena::scratch_arena; use stdext::arena_format; @@ -28,15 +29,21 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) { ctx.table_next_row(); - if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) { - let is_crlf = tb.is_crlf(); - tb.normalize_newlines(!is_crlf); - } + state.wants_language_picker |= ctx.button( + "language", + tb.language().map_or("Plain Text", |l| l.name), + ButtonStyle::default(), + ); if state.wants_statusbar_focus { state.wants_statusbar_focus = false; ctx.steal_focus(); } + if ctx.button("newline", if tb.is_crlf() { "CRLF" } else { "LF" }, ButtonStyle::default()) { + let is_crlf = tb.is_crlf(); + tb.normalize_newlines(!is_crlf); + } + state.wants_encoding_picker |= ctx.button("encoding", tb.encoding(), ButtonStyle::default()); if state.wants_encoding_picker { @@ -201,6 +208,55 @@ pub fn draw_statusbar(ctx: &mut Context, state: &mut State) { ctx.table_end(); } +pub fn draw_dialog_language_change(ctx: &mut Context, state: &mut State) { + let doc = state.documents.active_mut(); + let mut done = doc.is_none(); + + ctx.modal_begin("language", loc(LocId::LanguageSelectMode)); + if let Some(doc) = doc { + let width = (ctx.size().width - 20).max(10); + let height = (ctx.size().height - 10).max(10); + + ctx.scrollarea_begin("scrollarea", Size { width, height }); + ctx.attr_background_rgba(ctx.indexed_alpha(IndexedColor::Black, 1, 4)); + ctx.inherit_focus(); + { + ctx.list_begin("languages"); + ctx.inherit_focus(); + + let auto_detect = doc.language_override.is_none(); + let selected = if auto_detect { None } else { doc.buffer.borrow().language() }; + + if ctx.list_item(auto_detect, loc(LocId::LanguageAutoDetect)) + == ListSelection::Activated + { + doc.auto_detect_language(); + done = true; + } + + if ctx.list_item(selected.is_none(), "Plain Text") == ListSelection::Activated { + doc.override_language(None); + done = true; + } + + for lang in LANGUAGES { + if ctx.list_item(Some(lang) == selected, lang.name) == ListSelection::Activated { + doc.override_language(Some(lang)); + done = true; + } + } + ctx.list_end(); + } + ctx.scrollarea_end(); + } + done |= ctx.modal_end(); + + if done { + state.wants_language_picker = false; + ctx.needs_rerender(); + } +} + pub fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { let encoding = state.documents.active_mut().map_or("", |doc| doc.buffer.borrow().encoding()); let reopen = state.wants_encoding_change == StateEncodingChange::Reopen; diff --git a/crates/edit/src/bin/edit/main.rs b/crates/edit/src/bin/edit/main.rs index ea77d0af19e3..4562da56e9bc 100644 --- a/crates/edit/src/bin/edit/main.rs +++ b/crates/edit/src/bin/edit/main.rs @@ -8,6 +8,7 @@ mod draw_filepicker; mod draw_menubar; mod draw_statusbar; mod localization; +mod settings; mod state; use std::borrow::Cow; @@ -32,6 +33,8 @@ use stdext::arena::{self, Arena, scratch_arena}; use stdext::arena_format; use stdext::collections::{BString, BVec}; +use crate::settings::Settings; + #[cfg(target_pointer_width = "32")] const SCRATCH_ARENA_CAPACITY: usize = 128 * MEBI; #[cfg(target_pointer_width = "64")] @@ -72,6 +75,8 @@ fn run() -> apperr::Result<()> { return Ok(()); } + Settings::reload()?; + // This will reopen stdin if it's redirected (which may fail) and switch // the terminal to raw mode which prevents the user from pressing Ctrl+C. // `handle_args` may want to print a help message (must not fail), @@ -325,6 +330,9 @@ fn draw(ctx: &mut Context, state: &mut State) { if state.wants_save { draw_handle_save(ctx, state); } + if state.wants_language_picker { + draw_dialog_language_change(ctx, state); + } if state.wants_encoding_change != StateEncodingChange::None { draw_dialog_encoding_change(ctx, state); } diff --git a/crates/edit/src/bin/edit/settings.rs b/crates/edit/src/bin/edit/settings.rs new file mode 100644 index 000000000000..3d6239d4b9a0 --- /dev/null +++ b/crates/edit/src/bin/edit/settings.rs @@ -0,0 +1,107 @@ +use std::path::PathBuf; + +use edit::cell::{Ref, SemiRefCell}; +use edit::json; +use edit::lsh::{LANGUAGES, Language}; +use stdext::arena::{read_to_string, scratch_arena}; +use stdext::arena_format; + +use crate::apperr; + +pub struct Settings { + pub path: PathBuf, + pub file_associations: Vec<(String, &'static Language)>, +} + +struct SettingsCell(SemiRefCell); +unsafe impl Sync for SettingsCell {} +static SETTINGS: SettingsCell = SettingsCell(SemiRefCell::new(Settings::new())); + +impl Settings { + const fn new() -> Self { + Settings { path: PathBuf::new(), file_associations: Vec::new() } + } + + pub fn borrow() -> Ref<'static, Settings> { + SETTINGS.0.borrow() + } + + pub fn reload() -> apperr::Result<()> { + let s = Self::load()?; + *SETTINGS.0.borrow_mut() = s; + Ok(()) + } + + fn load() -> apperr::Result { + let mut settings = Self::new(); + + settings.path = match settings_json_path() { + Some(p) => p, + None => return Ok(settings), + }; + + let scratch = scratch_arena(None); + let str = match read_to_string(&scratch, &settings.path) { + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(settings), + Err(err) => return Err(err.into()), + Ok(str) => str, + }; + let Ok(json) = json::parse(&scratch, &str) else { + return Err(apperr::Error::SettingsInvalidJson); + }; + let Some(root) = json.as_object() else { + return Err(apperr::Error::SettingsInvalidValue); + }; + + if let Some(f) = root.get_object("fileAssociations") { + for &(mut key, ref value) in f.iter() { + if !key.contains('/') { + key = arena_format!(&*scratch, "**/{key}").leak(); + } + + let Some(id) = value.as_str() else { + return Err(apperr::Error::SettingsInvalidValue); + }; + let Some(language) = LANGUAGES.iter().find(|lang| lang.id == id) else { + return Err(apperr::Error::SettingsInvalidValue); + }; + + settings.file_associations.push((key.to_string(), language)); + } + } + + Ok(settings) + } +} + +fn settings_json_path() -> Option { + let mut config_dir = config_dir()?; + config_dir.push("settings.json"); + Some(config_dir) +} + +fn config_dir() -> Option { + fn var_path(key: &str) -> Option { + std::env::var_os(key).map(PathBuf::from) + } + + fn push(mut path: PathBuf, suffix: &str) -> PathBuf { + path.push(suffix); + path + } + + #[cfg(target_os = "windows")] + { + var_path("APPDATA").map(|p| push(p, "Microsoft/Edit")) + } + #[cfg(any(target_os = "macos", target_os = "ios"))] + { + var_path("HOME").map(|p| push(p, "Library/Application Support/com.microsoft.edit")) + } + #[cfg(not(any(target_os = "windows", target_os = "macos", target_os = "ios")))] + { + var_path("XDG_CONFIG_HOME") + .or_else(|| var_path("HOME").map(|p| push(p, ".config"))) + .map(|p| push(p, "msedit")) + } +} diff --git a/crates/edit/src/bin/edit/state.rs b/crates/edit/src/bin/edit/state.rs index c8d45bd8ca64..653efef06ecf 100644 --- a/crates/edit/src/bin/edit/state.rs +++ b/crates/edit/src/bin/edit/state.rs @@ -28,6 +28,8 @@ impl From for FormatApperr { impl std::fmt::Display for FormatApperr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.0 { + apperr::Error::SettingsInvalidJson => f.write_str("Settings JSON is malformed"), + apperr::Error::SettingsInvalidValue => f.write_str("Invalid settings value"), apperr::Error::Icu(icu::ICU_MISSING_ERROR) => f.write_str(loc(LocId::ErrorIcuMissing)), apperr::Error::Icu(ref err) => err.fmt(f), apperr::Error::Io(ref err) => err.fmt(f), @@ -152,6 +154,8 @@ pub struct State { pub search_options: buffer::SearchOptions, pub search_success: bool, + pub wants_language_picker: bool, + pub wants_encoding_picker: bool, pub wants_encoding_change: StateEncodingChange, pub encoding_picker_needle: String, @@ -200,6 +204,8 @@ impl State { search_options: Default::default(), search_success: true, + wants_language_picker: false, + wants_encoding_picker: false, encoding_picker_needle: Default::default(), encoding_picker_results: Default::default(), diff --git a/crates/edit/src/buffer/line_cache.rs b/crates/edit/src/buffer/line_cache.rs deleted file mode 100644 index af7cd59493f5..000000000000 --- a/crates/edit/src/buffer/line_cache.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::ops::Range; - -use crate::{document::ReadableDocument, simd::memchr2}; - -/// Cache a line/offset pair every CACHE_EVERY lines to speed up line/offset calculations -const CACHE_EVERY: usize = 1024 * 64; - -#[derive(Clone)] -pub struct CachePoint { - pub index: usize, - pub line: usize, - // pub snapshot: ParserSnapshot -} - -pub struct LineCache { - cache: Vec, -} - -impl LineCache { - pub fn new() -> Self { - Self { cache: vec![] } - } - - pub fn from_document(&mut self, document: &T) { - self.cache.clear(); - - let mut offset = 0; - let mut line = 0; - loop { - let text = document.read_forward(offset); - if text.is_empty() { return; } - - let mut off = 0; - loop { - off = memchr2(b'\n', b'\n', text, off); - if off == text.len() { break; } - - if line % CACHE_EVERY == 0 { - self.cache.push(CachePoint { index: offset+off, line }); - } - line += 1; - off += 1; - } - - offset += text.len(); - } - } - - /// Updates the cache after a deletion. - /// `range` is the deleted byte range, and `text` is the content that was deleted. - pub fn delete(&mut self, range: Range, text: &Vec) { - let mut newlines = 0; - for c in text { - if *c == b'\n' { - newlines += 1; - } - } - - let mut beg_del = None; - let mut end_del = None; - for (i, point) in self.cache.iter_mut().enumerate() { - if point.index >= range.start { - if point.index < range.end { - // cache point is within the deleted range - if beg_del.is_none() { beg_del = Some(i); } - end_del = Some(i + 1); - } - else { - point.index -= text.len(); - point.line -= newlines; - } - } - } - - if let (Some(beg), Some(end)) = (beg_del, end_del) { - self.cache.drain(beg..end); - } - } - - /// Updates the cache after an insertion. - /// `offset` is where the insertion occurs, and `text` is the inserted content. - pub fn insert(&mut self, offset: usize, text: &[u8]) { - // Count how many newlines were inserted - let mut newlines = 0; - for c in text { - if *c == b'\n' { - newlines += 1; - } - } - - let len = text.len(); - for point in &mut self.cache { - if point.index > offset { - point.index += len; - point.line += newlines; - } - } - - // TODO: This also needs to insert new cache points - } - - /// Finds the nearest cached line-offset pair relative to a target line. - /// If `reverse` is false, it returns the closest *before* the target. - /// If `reverse` is true, it returns the closest *after or at* the target. - pub fn nearest_offset(&self, target_count: usize, reverse: bool) -> Option { - match self.cache.binary_search_by_key(&target_count, |p| p.line) { - Ok(i) => Some(self.cache[i].clone()), - Err(i) => { - if i == 0 || i == self.cache.len() { None } // target < lowest cache point || target > highest cache point - else { - Some(self.cache[ if reverse {i} else {i-1} ].clone()) - } - } - } - } -} diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index e20eac76e531..80b8d5bb2988 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -42,8 +42,10 @@ use stdext::{ReplaceRange as _, arena_write_fmt, minmax, slice_as_uninit_mut, sl use crate::cell::SemiRefCell; use crate::clipboard::Clipboard; use crate::document::{ReadableDocument, WriteableDocument}; -use crate::framebuffer::{Framebuffer, IndexedColor}; +use crate::framebuffer::{Attributes, Framebuffer, IndexedColor}; use crate::helpers::*; +use crate::lsh::cache::HighlighterCache; +use crate::lsh::{HighlightKind, Highlighter, Language}; use crate::oklab::StraightRgba; use crate::simd::memchr2; use crate::unicode::{self, Cursor, MeasurementConfig}; @@ -250,6 +252,7 @@ pub struct TextBuffer { selection: Option, selection_generation: u32, search: Option>, + highlighter_cache: HighlighterCache, width: CoordType, margin_width: CoordType, @@ -259,6 +262,7 @@ pub struct TextBuffer { tab_size: CoordType, indent_with_tabs: bool, line_highlight_enabled: bool, + language: Option<&'static Language>, ruler: CoordType, encoding: &'static str, newlines_are_crlf: bool, @@ -298,6 +302,7 @@ impl TextBuffer { selection: None, selection_generation: 0, search: None, + highlighter_cache: HighlighterCache::new(), width: 0, margin_width: 0, @@ -307,6 +312,7 @@ impl TextBuffer { tab_size: 4, indent_with_tabs: false, line_highlight_enabled: false, + language: None, ruler: 0, encoding: "UTF-8", newlines_are_crlf: cfg!(windows), // Windows users want CRLF @@ -599,6 +605,15 @@ impl TextBuffer { self.line_highlight_enabled = enabled; } + pub fn language(&self) -> Option<&'static Language> { + self.language + } + + pub fn set_language(&mut self, language: Option<&'static Language>) { + self.language = language; + self.highlighter_cache.invalidate_from(0); + } + /// Sets a ruler column, e.g. 80. pub fn set_ruler(&mut self, column: CoordType) { self.ruler = column; @@ -677,6 +692,7 @@ impl TextBuffer { self.set_selection(None); self.mark_as_clean(); self.reflow(); + self.highlighter_cache.invalidate_from(0); } /// Copies the contents of the buffer into a string. @@ -1993,6 +2009,10 @@ impl TextBuffer { cursor = cursor_end; } + let logical_y_beg = self.cursor_for_rendering.unwrap().logical_pos.y; + let logical_y_end = cursor.logical_pos.y + 1; + self.render_apply_highlights(origin, destination, logical_y_beg..logical_y_end, fb); + // Colorize the margin that we wrote above. if self.margin_width > 0 { let margin = Rect { @@ -2058,6 +2078,153 @@ impl TextBuffer { Some(RenderResult { visual_pos_x_max }) } + fn render_apply_highlights( + &mut self, + origin: Point, + destination: Rect, + logical_y_range: Range, + fb: &mut Framebuffer, + ) { + let Some(language) = self.language else { + return; + }; + + let mut highlighter = Highlighter::new(&self.buffer, language); + + // Track cursor position for efficient offset-to-position conversions. + // Start from the rendering cursor which is at the beginning of the visible area. + let mut cursor = self.cursor_for_rendering.unwrap(); + + // Visible vertical range in visual coordinates. + let visible_top = origin.y; + let visible_bottom = origin.y + destination.height(); + + // Text area boundaries in screen coordinates (excluding margin). + let text_left = destination.left + self.margin_width; + let text_right = destination.right; + + for logical_y in logical_y_range { + // Seek cursor to the start of this logical line for efficient lookups. + // This is important because highlights are sorted by offset within + // each logical line. + cursor = self.goto_line_start(cursor, logical_y); + + let scratch = scratch_arena(None); + let highlights = + self.highlighter_cache.parse_line(&scratch, &mut highlighter, logical_y); + + for pair in highlights.windows(2) { + let curr = &pair[0]; + let next = &pair[1]; + + // Skip highlights with no visual effect. + if curr.kind == HighlightKind::Other { + continue; + } + + // Convert byte offsets to cursor positions. Since highlights are + // sorted by offset, we chain from cursor -> beg -> end for efficiency. + let beg = self.cursor_move_to_offset_internal(cursor, curr.start); + let end = self.cursor_move_to_offset_internal(beg, next.start); + cursor = end; + + let color = match curr.kind { + HighlightKind::Other => None, + HighlightKind::Comment => Some(IndexedColor::Green), + HighlightKind::Method => Some(IndexedColor::BrightYellow), + HighlightKind::String => Some(IndexedColor::BrightRed), + HighlightKind::Variable => Some(IndexedColor::BrightCyan), + HighlightKind::ConstantLanguage => Some(IndexedColor::BrightBlue), + HighlightKind::ConstantNumeric => Some(IndexedColor::BrightGreen), + HighlightKind::KeywordControl => Some(IndexedColor::BrightMagenta), + HighlightKind::KeywordOther => Some(IndexedColor::BrightBlue), + HighlightKind::MarkupBold => None, + HighlightKind::MarkupChanged => Some(IndexedColor::BrightBlue), + HighlightKind::MarkupDeleted => Some(IndexedColor::BrightRed), + HighlightKind::MarkupHeading => Some(IndexedColor::BrightBlue), + HighlightKind::MarkupInserted => Some(IndexedColor::BrightGreen), + HighlightKind::MarkupItalic => None, + HighlightKind::MarkupLink => None, + HighlightKind::MarkupList => Some(IndexedColor::BrightBlue), + HighlightKind::MarkupStrikethrough => None, + HighlightKind::MetaHeader => Some(IndexedColor::BrightBlue), + }; + let attr = match curr.kind { + HighlightKind::MarkupBold => Some(Attributes::Bold), + HighlightKind::MarkupItalic => Some(Attributes::Italic), + HighlightKind::MarkupLink => Some(Attributes::Underlined), + HighlightKind::MarkupStrikethrough => Some(Attributes::Strikethrough), + _ => None, + }; + + // Handle the case where the highlight spans multiple visual lines + // due to word wrapping. The range is [beg, end) in terms of offsets, + // which maps to visual lines [beg.visual_pos.y, end.visual_pos.y]. + // + // When beg and end are on the same visual line, we highlight + // [beg.visual_pos.x, end.visual_pos.x). + // + // When they span multiple lines: + // - First line: [beg.visual_pos.x, end_of_line) + // - Middle lines: [0, end_of_line) + // - Last line: [0, end.visual_pos.x) + // + // However, if end.visual_pos.x == 0, the last line has no content + // to highlight (the span ends exactly at the line boundary). + let visual_y_end = if end.visual_pos.x == 0 && end.visual_pos.y > beg.visual_pos.y { + // The span ends at position 0 of a new visual line, meaning + // it actually ends at the end of the previous visual line. + end.visual_pos.y - 1 + } else { + end.visual_pos.y + }; + + // Use min/max to skip visual lines outside the visible vertical range. + for visual_y in + beg.visual_pos.y.max(visible_top)..(visual_y_end + 1).min(visible_bottom) + { + let vis_left = if visual_y == beg.visual_pos.y { + beg.visual_pos.x + } else { + // Wrapped continuation lines start at visual x=0. + 0 + }; + let vis_right = if visual_y == end.visual_pos.y { + end.visual_pos.x + } else { + // Line extends to the word wrap column or beyond. + COORD_TYPE_SAFE_MAX + }; + + // Convert to screen coordinates. + let screen_left = text_left + vis_left - origin.x; + let screen_right = (text_left + vis_right - origin.x).min(text_right); + let screen_y = destination.top + visual_y - origin.y; + + // Create the target rectangle, clamped to the text area. + let rect = Rect { + left: screen_left.max(text_left), + top: screen_y, + right: screen_right, + bottom: screen_y + 1, + }; + + // Skip empty or invalid rectangles. + if rect.left >= rect.right { + continue; + } + + if let Some(color) = color { + fb.blend_fg(rect, fb.indexed(color)); + } + if let Some(attr) = attr { + fb.replace_attr(rect, Attributes::All, attr); + } + } + } + } + } + pub fn cut(&mut self, clipboard: &mut Clipboard) { self.cut_copy(clipboard, true); } @@ -2613,6 +2780,7 @@ impl TextBuffer { } self.active_edit_off = cursor.offset; + self.highlighter_cache.invalidate_from(cursor.logical_pos.y); // If word-wrap is enabled, the visual layout of all logical lines affected by the write // may have changed. This includes even text before the insertion point up to the line @@ -2861,6 +3029,8 @@ impl TextBuffer { return; } + self.highlighter_cache.invalidate_from(damage_start); + if entry_buffer_generation.is_some() { self.recalc_after_content_changed(); } diff --git a/crates/edit/src/framebuffer.rs b/crates/edit/src/framebuffer.rs index 464d11941e52..f6640e344fdb 100644 --- a/crates/edit/src/framebuffer.rs +++ b/crates/edit/src/framebuffer.rs @@ -509,6 +509,13 @@ impl Framebuffer { if last_attr != attr { let diff = last_attr ^ attr; + if diff.is(Attributes::Bold) { + if attr.is(Attributes::Bold) { + result.push_str(arena, "\x1b[1m"); + } else { + result.push_str(arena, "\x1b[22m"); + } + } if diff.is(Attributes::Italic) { if attr.is(Attributes::Italic) { result.push_str(arena, "\x1b[3m"); @@ -523,6 +530,13 @@ impl Framebuffer { result.push_str(arena, "\x1b[24m"); } } + if diff.is(Attributes::Strikethrough) { + if attr.is(Attributes::Strikethrough) { + result.push_str(arena, "\x1b[9m"); + } else { + result.push_str(arena, "\x1b[29m"); + } + } last_attr = attr; } @@ -838,9 +852,11 @@ pub struct Attributes(u8); #[allow(non_upper_case_globals)] impl Attributes { pub const None: Self = Self(0); - pub const Italic: Self = Self(0b1); - pub const Underlined: Self = Self(0b10); - pub const All: Self = Self(0b11); + pub const Bold: Self = Self(1); + pub const Italic: Self = Self(2); + pub const Underlined: Self = Self(4); + pub const Strikethrough: Self = Self(8); + pub const All: Self = Self(16 - 1); pub const fn is(self, attr: Self) -> bool { (self.0 & attr.0) == attr.0 diff --git a/crates/edit/src/lib.rs b/crates/edit/src/lib.rs index 6bb731cfd5ab..585b3bc928fd 100644 --- a/crates/edit/src/lib.rs +++ b/crates/edit/src/lib.rs @@ -15,12 +15,12 @@ pub mod clipboard; pub mod document; pub mod framebuffer; pub mod fuzzy; -pub mod glob; pub mod hash; pub mod helpers; pub mod icu; pub mod input; pub mod json; +pub mod lsh; pub mod oklab; pub mod path; pub mod simd; diff --git a/crates/edit/src/lsh/cache.rs b/crates/edit/src/lsh/cache.rs new file mode 100644 index 000000000000..0c23b6d23997 --- /dev/null +++ b/crates/edit/src/lsh/cache.rs @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use lsh::runtime::Highlight; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::BVec; + +use crate::helpers::CoordType; +use crate::lsh::{HighlightKind, Highlighter, HighlighterState}; + +#[cfg(debug_assertions)] +const INTERVAL: CoordType = 16; +#[cfg(not(debug_assertions))] +const INTERVAL: CoordType = 1024; + +#[derive(Default)] +pub struct HighlighterCache { + checkpoints: Vec, +} + +impl HighlighterCache { + pub fn new() -> Self { + Self::default() + } + + /// Drop any cached states starting at (including) the given logical line. + pub fn invalidate_from(&mut self, line: CoordType) { + self.checkpoints.truncate(Self::ceil_line_to_offset(line)); + } + + /// Parse the given logical line. Returns the highlight spans. + pub fn parse_line<'a>( + &mut self, + arena: &'a Arena, + highlighter: &mut Highlighter, + line: CoordType, + ) -> BVec<'a, Highlight> { + // Do we need to random seek? + if line != highlighter.logical_pos_y() { + // If so, restore the nearest, preceeding checkpoint... + if !self.checkpoints.is_empty() { + let n = Self::floor_line_to_offset(line); + let n = n.min(self.checkpoints.len() - 1); + highlighter.restore(&self.checkpoints[n]); + } else { + // The assumption is that you pass in a default constructed highlighter, + // and this class handles random seeking for you. As such, there should + // never be a case where we don't have a checkpoint for line 0, + // but you have a highlighter for line >0. + debug_assert!(highlighter.logical_pos_y() == 0); + } + + // ...and then seek in front of the requested line. + while highlighter.logical_pos_y() < line { + // There's a bit of waste here, because we just throw away the results, + // but that's better than duplicating the logic. The arena is very fast. + let scratch = scratch_arena(Some(arena)); + _ = self.parse_line_impl(&scratch, highlighter); + } + } + + self.parse_line_impl(arena, highlighter) + } + + fn parse_line_impl<'a>( + &mut self, + arena: &'a Arena, + highlighter: &mut Highlighter, + ) -> BVec<'a, Highlight> { + // If we need to store a checkpoint for the start of the next line, do so now. + if Self::floor_line_to_offset(highlighter.logical_pos_y()) == self.checkpoints.len() { + self.checkpoints.push(highlighter.snapshot()); + } + + highlighter.parse_next_line(arena) + } + + /// Since this line cache is super simplistic (no insertions, only append), + /// we can directly map from line numbers to offsets in the cache. + fn floor_line_to_offset(line: CoordType) -> usize { + (line / INTERVAL).try_into().unwrap_or(0) + } + + fn ceil_line_to_offset(line: CoordType) -> usize { + ((line + INTERVAL - 1) / INTERVAL).try_into().unwrap_or(0) + } +} diff --git a/crates/edit/src/lsh/definitions.rs b/crates/edit/src/lsh/definitions.rs new file mode 100644 index 000000000000..7c3cea5cfff6 --- /dev/null +++ b/crates/edit/src/lsh/definitions.rs @@ -0,0 +1,4 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +include!(concat!(env!("OUT_DIR"), "/lsh_definitions.rs")); diff --git a/crates/edit/src/lsh/highlighter.rs b/crates/edit/src/lsh/highlighter.rs new file mode 100644 index 000000000000..ea34f5a5eb2f --- /dev/null +++ b/crates/edit/src/lsh/highlighter.rs @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use lsh::runtime::*; +use stdext::arena::{Arena, scratch_arena}; +use stdext::collections::BVec; + +use crate::document::ReadableDocument; +use crate::helpers::*; +use crate::lsh::definitions::*; +use crate::{simd, unicode}; + +const MAX_LINE_LEN: usize = 32 * KIBI; + +#[derive(Clone)] +pub struct Highlighter<'a> { + doc: &'a dyn ReadableDocument, + offset: usize, + logical_pos_y: CoordType, + runtime: Runtime<'static, 'static, 'static>, +} + +#[derive(Clone)] +pub struct HighlighterState { + offset: usize, + logical_pos_y: CoordType, + state: RuntimeState, +} + +impl<'doc> Highlighter<'doc> { + pub fn new(doc: &'doc dyn ReadableDocument, language: &'static Language) -> Self { + Self { + doc, + offset: 0, + logical_pos_y: 0, + runtime: Runtime::new(&ASSEMBLY, &STRINGS, &CHARSETS, language.entrypoint), + } + } + + pub fn logical_pos_y(&self) -> CoordType { + self.logical_pos_y + } + + /// Create a restorable snapshot of the current highlighter state + /// so we can resume highlighting from this point later. + pub fn snapshot(&self) -> HighlighterState { + HighlighterState { + offset: self.offset, + logical_pos_y: self.logical_pos_y, + state: self.runtime.snapshot(), + } + } + + /// Restore the highlighter state from a previously captured snapshot. + pub fn restore(&mut self, snapshot: &HighlighterState) { + self.offset = snapshot.offset; + self.logical_pos_y = snapshot.logical_pos_y; + self.runtime.restore(&snapshot.state); + } + + pub fn parse_next_line<'a>(&mut self, arena: &'a Arena) -> BVec<'a, Highlight> { + let scratch = scratch_arena(Some(arena)); + let (line_beg, line) = self.read_next_line(&scratch); + + // Empty lines can be somewhat common. + // + // If the line is too long, we don't highlight it. + // This is to prevent performance issues with very long lines. + if line.is_empty() || line.len() >= MAX_LINE_LEN { + return BVec::empty(); + } + + let line = unicode::strip_newline(line); + let mut res = self.runtime.parse_next_line(arena, line); + + // Adjust the range to account for the line offset. + for h in res.iter_mut() { + h.start = line_beg + h.start.min(line.len()); + } + + res + } + + fn read_next_line<'a>(&mut self, arena: &'a Arena) -> (usize, &'a [u8]) + where + 'doc: 'a, + { + self.logical_pos_y += 1; + + let line_beg = self.offset; + let mut chunk; + let mut line_buf; + + // Try to read a chunk and see if it contains a newline. + // In that case we can skip concatenating chunks. + { + chunk = self.doc.read_forward(self.offset); + if chunk.is_empty() { + return (line_beg, chunk); + } + + let (off, line) = simd::lines_fwd(chunk, 0, 0, 1); + self.offset += off; + + if line == 1 { + return (line_beg, &chunk[..off]); + } + + let next_chunk = self.doc.read_forward(self.offset); + if next_chunk.is_empty() { + return (line_beg, &chunk[..off]); + } + + line_buf = BVec::empty(); + + // Ensure we don't overflow the heap size with a 1GB long line. + let end = off.min(MAX_LINE_LEN - line_buf.len()); + let end = end.min(chunk.len()); + line_buf.extend_from_slice(arena, &chunk[..end]); + + chunk = next_chunk; + } + + // Concatenate chunks until we get a full line. + while line_buf.len() < MAX_LINE_LEN { + let (off, line) = simd::lines_fwd(chunk, 0, 0, 1); + self.offset += off; + + // Ensure we don't overflow the heap size with a 1GB long line. + let end = off.min(MAX_LINE_LEN - line_buf.len()); + let end = end.min(chunk.len()); + line_buf.extend_from_slice(arena, &chunk[..end]); + + // Start of the next line found. + if line == 1 { + break; + } + + chunk = self.doc.read_forward(self.offset); + if chunk.is_empty() { + break; + } + } + + (line_beg, line_buf.leak()) + } +} diff --git a/crates/edit/src/lsh/mod.rs b/crates/edit/src/lsh/mod.rs new file mode 100644 index 000000000000..986083bfce85 --- /dev/null +++ b/crates/edit/src/lsh/mod.rs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Microsoft Edit's adapter to LSH. + +pub mod cache; +mod definitions; +mod highlighter; + +use std::path::Path; + +pub use definitions::{FILE_ASSOCIATIONS, HighlightKind, LANGUAGES}; +pub use highlighter::*; +pub use lsh::runtime::Language; +use stdext::glob::glob_match; + +pub fn process_file_associations( + associations: &[(T, &'static Language)], + path: &Path, +) -> Option<&'static Language> +where + T: AsRef<[u8]>, +{ + let path = path.as_os_str().as_encoded_bytes(); + + for a in associations { + if glob_match(a.0.as_ref(), path) { + return Some(a.1); + } + } + + None +} diff --git a/crates/lsh-bin/Cargo.toml b/crates/lsh-bin/Cargo.toml new file mode 100644 index 000000000000..8601fe1960e4 --- /dev/null +++ b/crates/lsh-bin/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "lsh-bin" +version = "0.0.0" + +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +anyhow = "*" +argh = "*" +lsh.workspace = true +stdext.workspace = true diff --git a/crates/lsh-bin/src/main.rs b/crates/lsh-bin/src/main.rs new file mode 100644 index 000000000000..6e6a4ae7faf5 --- /dev/null +++ b/crates/lsh-bin/src/main.rs @@ -0,0 +1,188 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::fs::File; +use std::io::{BufRead, BufReader, BufWriter, IsTerminal, Write as _, stdout}; +use std::path::{Path, PathBuf}; +use std::process::exit; + +use anyhow::bail; +use argh::FromArgs; +use lsh::compiler::SerializedCharset; +use lsh::runtime::Runtime; +use stdext::arena::scratch_arena; +use stdext::glob::glob_match; + +#[derive(FromArgs, PartialEq, Debug)] +#[argh(description = "Debug and test frontend for Leonard's Shitty Highlighter")] +struct Command { + #[argh(subcommand)] + sub: SubCommands, +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argh(subcommand)] +enum SubCommands { + Compile(SubCommandOneCompile), + Assembly(SubCommandAssembly), + Render(SubCommandRender), +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argh(subcommand, name = "compile", description = "Generate Rust code from .lsh files")] +struct SubCommandOneCompile { + #[argh(positional, description = "source .lsh file or directory")] + lsh: PathBuf, +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argh(subcommand, name = "assembly", description = "Generate assembly from .lsh files")] +struct SubCommandAssembly { + #[argh(positional, description = "source .lsh file or directory")] + lsh: PathBuf, +} + +#[derive(FromArgs, PartialEq, Debug)] +#[argh(subcommand, name = "render", description = "Highlight text files")] +struct SubCommandRender { + #[argh(positional, description = "source .lsh file or directory")] + lsh: PathBuf, + #[argh(positional, description = "source text file")] + input: PathBuf, +} + +pub fn main() { + if let Err(e) = run() { + eprintln!("{e}"); + exit(1); + } +} + +fn run() -> anyhow::Result<()> { + stdext::arena::init(128 * 1024 * 1024).unwrap(); + + let command: Command = argh::from_env(); + let scratch = scratch_arena(None); + let mut generator = lsh::compiler::Generator::new(&scratch); + let mut read_lsh = |path: &Path| { + if path.is_dir() { generator.read_directory(path) } else { generator.read_file(path) } + }; + + match &command.sub { + SubCommands::Compile(cmd) => { + read_lsh(&cmd.lsh)?; + let output = generator.generate_rust()?; + _ = stdout().write_all(output.as_bytes()); + } + SubCommands::Assembly(cmd) => { + read_lsh(&cmd.lsh)?; + let vt = stdout().is_terminal(); + let output = generator.generate_assembly(vt)?; + _ = stdout().write_all(output.as_bytes()); + } + SubCommands::Render(cmd) => { + read_lsh(&cmd.lsh)?; + run_render(generator, &cmd.input)?; + } + } + + Ok(()) +} + +fn run_render(generator: lsh::compiler::Generator, path: &Path) -> anyhow::Result<()> { + let assembly = generator.assemble()?; + + let Some(entrypoint) = assembly.entrypoints.iter().find(|ep| { + ep.paths + .iter() + .any(|pattern| glob_match(pattern.as_bytes(), path.as_os_str().as_encoded_bytes())) + }) else { + bail!("No matching highlighting definition found"); + }; + + let mut color_map = Vec::new(); + let mut unknown_kinds = Vec::new(); + for hk in &assembly.highlight_kinds { + let color = match hk.identifier { + "other" => "", + + "comment" => "\x1b[32m", // Green + "method" => "\x1b[93m", // Bright Yellow + "string" => "\x1b[91m", // Bright Red + "variable" => "\x1b[96m", // Bright Cyan + + "constant.language" => "\x1b[94m", // Bright Blue + "constant.numeric" => "\x1b[92m", // Bright Green + "keyword.control" => "\x1b[95m", // Bright Magenta + "keyword.other" => "\x1b[94m", // Bright Blue + "markup.bold" => "\x1b[1m", // Bold + "markup.changed" => "\x1b[94m", // Bright Blue + "markup.deleted" => "\x1b[91m", // Bright Red + "markup.heading" => "\x1b[94m", // Bright Blue + "markup.inserted" => "\x1b[92m", // Bright Green + "markup.italic" => "\x1b[3m", // Italic + "markup.link" => "\x1b[4m", // Underlined + "markup.list" => "\x1b[94m", // Bright Blue + "markup.strikethrough" => "\x1b[9m", // Strikethrough + "meta.header" => "\x1b[94m", // Bright Blue + + _ => { + unknown_kinds.push(hk.identifier.to_string()); + "" + } + }; + + if !color.is_empty() { + if color_map.len() <= hk.value as usize { + color_map.resize(hk.value as usize + 1, ""); + } + color_map[hk.value as usize] = color; + } + } + if !unknown_kinds.is_empty() { + eprintln!("\x1b[33mWarning: Unknown highlight kinds:"); + for kind in &unknown_kinds { + eprintln!(" - {}", kind); + } + eprintln!("\x1b[m"); + } + + // Convert Assembly data to static references by leaking memory + // This is fine for a CLI tool that runs once and exits + let charsets: Vec = + assembly.charsets.into_iter().map(|cs| cs.serialize()).collect(); + + let mut runtime = Runtime::new( + &assembly.instructions, + &assembly.strings, + &charsets, + entrypoint.address as u32, + ); + + let reader = BufReader::with_capacity(128 * 1024, File::open(path)?); + let mut stdout = BufWriter::with_capacity(128 * 1024, stdout()); + + for line in reader.lines() { + let line = line?; + let scratch = scratch_arena(None); + let highlights = runtime.parse_next_line::(&scratch, line.as_bytes()); + + for w in highlights.windows(2) { + let curr = &w[0]; + let next = &w[1]; + let start = curr.start; + let end = next.start; + let kind = curr.kind; + let text = &line[start..end]; + + if let Some(color) = color_map.get(kind as usize) { + write!(stdout, "{color}{text}\x1b[m")?; + } else { + stdout.write_all(text.as_bytes())?; + } + } + writeln!(stdout)?; + } + + Ok(()) +} diff --git a/crates/lsh/definitions/diff.lsh b/crates/lsh/definitions/diff.lsh new file mode 100644 index 000000000000..7b71c3dd2cc1 --- /dev/null +++ b/crates/lsh/definitions/diff.lsh @@ -0,0 +1,12 @@ +#[display_name = "Diff"] +#[path = "**/*.diff"] +#[path = "**/*.patch"] +pub fn diff() { + if /(?:diff|---|\+\+\+).*/ { + yield meta.header; + } else if /-.*/ { + yield markup.deleted; + } else if /\+.*/ { + yield markup.inserted; + } +} diff --git a/crates/lsh/definitions/json.lsh b/crates/lsh/definitions/json.lsh new file mode 100644 index 000000000000..6b8f296b441b --- /dev/null +++ b/crates/lsh/definitions/json.lsh @@ -0,0 +1,39 @@ +#[display_name = "JSON"] +#[path = "**/*.json"] +#[path = "**/*.jsonc"] +pub fn json() { + until /$/ { + yield other; + + if /\/\/.*/ { + yield comment; + } else if /\/\*/ { + loop { + yield comment; + await input; + if /\*\// { + yield comment; + break; + } + } + } else if /"/ { + double_quote_string(); + } else if /true|false|null/ { + if /\w+/ { + // Not a keyword after all. + } else { + yield constant.language; + } + } else if /-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/ { + if /\w+/ { + // Not a number after all. + } else { + yield constant.numeric; + } + } else if /\w+/ { + // Invalid token? Skip. + } + + yield other; + } +} diff --git a/crates/lsh/definitions/lsh.lsh b/crates/lsh/definitions/lsh.lsh new file mode 100644 index 000000000000..e9cf958916c8 --- /dev/null +++ b/crates/lsh/definitions/lsh.lsh @@ -0,0 +1,50 @@ +#[display_name = "LSH"] +#[path = "**/*.lsh"] +pub fn lsh() { + until /$/ { + yield other; + + if /\/\/.*/ { + yield comment; + } else if /pub|fn|await/ { + if /\w+/ { + yield other; + } else { + yield keyword.other; + } + } else if /if|else|until|loop|break|continue/ { + if /\w+/ { + yield other; + } else { + yield keyword.control; + } + } else if /yield/ { + if /\w+/ { + yield other; + } else { + yield keyword.other; + if /\s+/ { + // Gobble space to the argument + yield other; + } + if /[\w.]+/ { + // The yield argument + yield markup.link; + } + } + } else if /\// { + until /$/ { + if /\\./ { + // Skip escape char + } else if /\// { + yield string; + break; + } + } + } else if /"/ { + double_quote_string(); + } + + yield other; + } +} diff --git a/crates/lsh/definitions/markdown.lsh b/crates/lsh/definitions/markdown.lsh new file mode 100644 index 000000000000..f5444d32f94d --- /dev/null +++ b/crates/lsh/definitions/markdown.lsh @@ -0,0 +1,204 @@ +#[display_name = "Markdown"] +#[path = "**/*.md"] +pub fn markdown() { + // Gobble any leading whitespace on the line. + if /\s+/ { + yield other; + } + + if /#+\s+.*/ { + yield markup.heading; + } else if />.*/ { + yield comment; + } else if /```/ { + // NOTE: These checks are sorted alphabetically. + if /(?i:diff)/ { + loop { + await input; + if /\s*```/ { + return; + } else { + diff(); + // diff() may not have eaten that line. + // It doesn't have to. That's our responsibility. + if /.*/ {} + } + } + } else if /(?i:json)/ { + loop { + await input; + if /\s*```/ { + return; + } else { + json(); + if /.*/ {} + } + } + } else if /(?i:yaml)/ { + loop { + await input; + if /\s*```/ { + return; + } else { + yaml(); + if /.*/ {} + } + } + } else if /(?i:pwsh|powershell)/ { + loop { + await input; + if /\s*```/ { + return; + } else { + powershell(); + if /.*/ {} + } + } + } else { + loop { + await input; + if /\s*```.*/ { + return; + } else if /.*/ { + // Gobble the rest of the line with no highlighting. + } + } + } + } else { + if /[\*-]\s+/ { + yield markup.list; + } + + // The structure of inline emphasis, etc., matchers is + // awfully wrong, but parsing Markdown is also awful in general. + // In fact I think it's categorically impossible to implement like this, + // because it requires lookahead across potentially infinite lines: + // The Markdown spec describes its "parsing strategy" as "two phase" + // where the first phase segments the document into block structures. + // For instance, a `comment` may span multiple lines, _unless_ + // there's a paragraph break inbetween, e.g. due to a ">" comment. + until /$/ { + yield other; + + if /\[/ { + // Anything inside [...] is a link text and is colored like a string. + yield other; + if /[^\]]*/ { + yield string; + + // Now look for the link target in parentheses. + if /\]\(/ { + yield other; + if /[^\s\)]+/ { + yield markup.link; + } + + if /\s+/ { + } + + // The link may have a title in quotes. + yield other; + if /"/ { + if /[^"]+/ { + } + if /"/ { + yield string; + } + } + } + + } + } else if /__/ { + if /_+/ { + // "_____" = no hit + } else if /\s+/ { + // "__ foo __" = no hit + } else { + until /$/ { + if /\\./ { + // gobble escape char + } else if /[^\s_]+__/ { + yield markup.bold; + break; + } else if /__/ { + break; + } + } + } + } else if /\*\*/ { + if /\s+/ { + // "** foo **" = no hit + } else { + until /$/ { + if /\\./ { + // gobble escape char + } else if /[^\s*]+\*\*/ { + yield markup.bold; + break; + } else if /\*\*/ { + break; + } + } + } + } else if /_/ { + if /\s+/ { + // "_ foo _" = no hit + } else { + until /$/ { + if /\\./ { + // gobble escape char + } else if /[^\s_]+_/ { + yield markup.italic; + break; + } else if /_/ { + break; + } + } + } + } else if /\*/ { + if /\s+/ { + // "* foo *" = no hit + } else { + until /$/ { + if /\\./ { + // gobble escape char + } else if /[^\s*]+\*/ { + yield markup.italic; + break; + } else if /\*/ { + break; + } + } + } + } else if /~~/ { + if /\s+/ { + // "~~ foo ~~" = no hit + } else { + until /$/ { + if /\\./ { + // gobble escape char + } else if /[^\s~]+~~/ { + yield markup.strikethrough; + break; + } else if /~~/ { + break; + } + } + } + } else if /```/ { + until /$/ { if /```/ { break; } } + yield string; + } else if /``/ { + until /$/ { if /``/ { break; } } + yield string; + } else if /`/ { + until /$/ { if /`/ { break; } } + yield string; + } else if /\\./ { + // Gobble escape char + } + + yield other; + } + } +} diff --git a/crates/lsh/definitions/powershell.lsh b/crates/lsh/definitions/powershell.lsh new file mode 100644 index 000000000000..ce4bf5e45747 --- /dev/null +++ b/crates/lsh/definitions/powershell.lsh @@ -0,0 +1,64 @@ +#[display_name = "PowerShell"] +#[path = "**/*.ps1"] +#[path = "**/*.psd1"] +#[path = "**/*.psm1"] +pub fn powershell() { + until /$/ { + yield other; + + if /#.*/ { + yield comment; + } else if /<#/ { + loop { + yield comment; + if /#>/ { yield comment; break; } + await input; + } + } else if /'/ { + loop { + yield string; + if /\\./ {} + else if /'/ { yield string; break; } + await input; + } + } else if /@"/ { + loop { + yield string; + if /\\./ {} + else if /"@/ { yield string; break; } + await input; + } + } else if /"/ { + loop { + yield string; + if /\\./ {} + else if /"/ { yield string; break; } + await input; + } + } else if /function|param/ { + yield keyword.other; + } else if /elseif|else|if|for|switch|default|throw|try|catch|finally/ { + yield keyword.control; + } else if /-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/ { + if /[\w\-]+/ { + yield method; + } else { + yield constant.numeric; + } + } else if /\$false|\$true|\$null/ { + if /\w+/ { + yield variable; + } else { + yield constant.language; + } + } else if /[$@][\w\d?:]+/ { + yield variable; + } else if /[\w\d?:]+-[\w\d?:-]*/ { + yield method; + } else if /[\w\d?:]+/ { + // Gobble any other tokens that should not be highlighted + } + + yield other; + } +} diff --git a/crates/lsh/definitions/properties.lsh b/crates/lsh/definitions/properties.lsh new file mode 100644 index 000000000000..fe109b406dd1 --- /dev/null +++ b/crates/lsh/definitions/properties.lsh @@ -0,0 +1,55 @@ +#[display_name = "Properties"] +// ini +#[path = "**/*.ini"] +// git +#[path = "**/.config/git/config"] +#[path = "**/.git/config"] +#[path = "**/gitconfig"] +#[path = "**/*.gitattributes"] +#[path = "**/*.gitconfig"] +#[path = "**/*.gitmodules"] +// things that VS Code calls "properties" files +#[path = "**/*.cfg"] +#[path = "**/*.conf"] +#[path = "**/*.directory"] +#[path = "**/*.editorconfig"] +#[path = "**/*.properties"] +#[path = "**/*.repo"] +// systemd +#[path = "**/*.automount"] +#[path = "**/*.dnssd"] +#[path = "**/*.link"] +#[path = "**/*.mount"] +#[path = "**/*.netdev"] +#[path = "**/*.network"] +#[path = "**/*.nspawn"] +#[path = "**/*.path"] +#[path = "**/*.service"] +#[path = "**/*.slice"] +#[path = "**/*.socket"] +#[path = "**/*.swap"] +#[path = "**/*.systemd"] +#[path = "**/*.target"] +#[path = "**/*.timer"] +pub fn properties() { + if /\[[^\[]+\]/ { + yield meta.header; + } + + until /$/ { + yield other; + + if /[;#].*/ { + yield comment; + } else if /([\w.-]+)\s*=\s*[\w.-]*/ { + yield $1 as variable; + yield other; + } else if /'/ { + single_quote_string(); + } else if /"/ { + double_quote_string(); + } + + yield other; + } +} diff --git a/crates/lsh/definitions/utility.lsh b/crates/lsh/definitions/utility.lsh new file mode 100644 index 000000000000..d8f6be7ed76d --- /dev/null +++ b/crates/lsh/definitions/utility.lsh @@ -0,0 +1,21 @@ +fn single_quote_string() { + until /$/ { + if /\\./ { + // Escape sequences + } else if /'/ { + yield string; + break; + } + } +} + +fn double_quote_string() { + until /$/ { + if /\\./ { + // Escape sequences + } else if /"/ { + yield string; + break; + } + } +} diff --git a/crates/lsh/definitions/xml.lsh b/crates/lsh/definitions/xml.lsh new file mode 100644 index 000000000000..988273c91654 --- /dev/null +++ b/crates/lsh/definitions/xml.lsh @@ -0,0 +1,78 @@ +#[display_name = "XML"] +#[path = "**/*.csproj.user"] +#[path = "**/*.csproj"] +#[path = "**/*.html"] +#[path = "**/*.nuspec"] +#[path = "**/*.proj"] +#[path = "**/*.props"] +#[path = "**/*.rss"] +#[path = "**/*.slnx"] +#[path = "**/*.svg"] +#[path = "**/*.targets"] +#[path = "**/*.vcxproj.filters"] +#[path = "**/*.vcxproj"] +#[path = "**/*.xaml"] +#[path = "**/*.xml"] +#[path = "**/*.xml"] +pub fn xml() { + until /$/ { + yield other; + + if // { + yield comment; + break; + } + } + } else if // { + yield string; + break; + } + } + } else if /]*>/ { + yield $1 as constant.language; + yield other; + } else if /(?:<\?|<)([\w:.-]+)/ { + yield $1 as constant.language; + yield other; + + until />/ { + yield other; + await input; + + if /([\w:.-]+)\s*=/ { + yield $1 as variable; + yield other; + } else if /"/ { + until /"/ { + yield string; + await input; + } + yield string; + } else if /'/ { + until /'/ { + yield string; + await input; + } + yield string; + } + } + + yield other; + } else if /<\/([\w:.-]+)\s*>/ { + yield $1 as constant.language; + yield other; + } else if /(?:&#|&)[\w:.-]+;/ { + yield constant.numeric; + } + + yield other; + } +} diff --git a/crates/lsh/definitions/yaml.lsh b/crates/lsh/definitions/yaml.lsh new file mode 100644 index 000000000000..a090447e3dd5 --- /dev/null +++ b/crates/lsh/definitions/yaml.lsh @@ -0,0 +1,78 @@ +#[display_name = "YAML"] +#[path = "**/*.yaml"] +#[path = "**/*.yml"] +pub fn yaml() { + if /\s+/ { + // Leading whitespace + } + + var indentation = off; + + loop { + if /-\s+/ { + // List item + } + if /\w+:(?:\s+|$)/ { + // Key: (Value follows) + } + + if /[|>][-+]?.*/ { + // If we see a multiline string inducer, capture content until indentation is back to before. + // Since we'll skip multiple lines, we need to use nested loops here. + + loop { + // Wait for the next line in this multiline string. + await input; + + // Skip leading whitespace + if /\s+/ { + } + + if off <= indentation { + // Once the new indentation is smaller, the multiline string has ended. + break; + } + + if /.*/ { + yield string; + } + } + } else { + break; + } + } + + yield other; + + if /---/ { + yield other; + } else if /true|false|yes|no|null|~/ { + // If it looks like a keyword, check if it's actually a string (e.g. "foo: true false"). + // TODO: Ideally this would not consume the last chunk of /\s+/ before a /# comment/. + if /\s*[^\s#]+[^#]*/ { + yield string; + } else { + yield constant.language; + } + } else if /-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?/ { + // Same as with keywords above. + if /\s*[^\s#]+[^#]*/ { + yield string; + } else { + yield constant.numeric; + } + } else if /[^\s#]+[^#]*/ { + // Same as before: Gobble anything that looks like a string. + // TODO: This could be simplified & combined with the above if we stored the + // highlight kind into a variable and then yield the variable value itself. + yield string; + } + + if /\s+/ { + yield other; + } + + if /#.*/ { + yield comment; + } +} diff --git a/crates/lsh/src/compiler/backend.rs b/crates/lsh/src/compiler/backend.rs index a43ee40585b0..cbe6f84c51cb 100644 --- a/crates/lsh/src/compiler/backend.rs +++ b/crates/lsh/src/compiler/backend.rs @@ -522,6 +522,7 @@ impl<'a> LivenessAnalysis<'a> { let ir = cell.borrow(); + #[allow(clippy::collapsible_match)] match ir.instr { IRI::Mov { dst, src } => { if dst.borrow().physical.is_none() { diff --git a/crates/lsh/src/runtime.rs b/crates/lsh/src/runtime.rs index 3bfd1f07c784..c3af7aac4a42 100644 --- a/crates/lsh/src/runtime.rs +++ b/crates/lsh/src/runtime.rs @@ -22,7 +22,8 @@ //! - [`Instruction::address_offset`] returns where, within an instruction, the jump target lives, //! as used by the backend's relocation system. -use std::fmt; +use std::fmt::{self, Debug}; +use std::mem; use stdext::arena::Arena; use stdext::arena_write_fmt; @@ -55,12 +56,303 @@ pub struct Highlight { pub kind: T, } -impl fmt::Debug for Highlight { +impl Debug for Highlight { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "({}, {:?})", self.start, self.kind) } } +/// The bytecode interpreter for syntax highlighting. +#[derive(Clone)] +pub struct Runtime<'pa, 'ps, 'pc> { + assembly: &'pa [u8], + strings: &'ps [&'ps str], + charsets: &'pc [[u16; 16]], + entrypoint: u32, + stack: Vec, + registers: Registers, +} + +/// Snapshot of the runtime state for incremental re-highlighting. +#[derive(Clone)] +pub struct RuntimeState { + stack: Vec, + registers: Registers, +} + +impl<'pa, 'ps, 'pc> Runtime<'pa, 'ps, 'pc> { + pub fn new( + assembly: &'pa [u8], + strings: &'ps [&'ps str], + charsets: &'pc [[u16; 16]], + entrypoint: u32, + ) -> Self { + Runtime { + assembly, + strings, + charsets, + entrypoint, + stack: Default::default(), + registers: Registers { pc: entrypoint, ..Default::default() }, + } + } + + pub fn snapshot(&self) -> RuntimeState { + RuntimeState { stack: self.stack.clone(), registers: self.registers } + } + + pub fn restore(&mut self, state: &RuntimeState) { + self.stack = state.stack.clone(); + self.registers = state.registers; + } + + /// Parse a single line and return highlight spans. + /// + /// Executes bytecode until the line is fully consumed or a `Return` resets the VM. + /// The returned spans partition the line into highlighted regions. + /// + /// # Returns + /// A vector of [`Highlight`] spans. Always contains at least two spans: + /// one at offset 0 and one at `line.len()` as a sentinel. + pub fn parse_next_line<'a, T: PartialEq + TryFrom>( + &mut self, + arena: &'a Arena, + line: &[u8], + ) -> BVec<'a, Highlight> { + let mut res: BVec<'a, Highlight> = BVec::empty(); + + self.registers.off = 0; + self.registers.hs = 0; + + // By default, any line starts with HighlightKind::Other. + // If the DSL yields anything, this will be overwritten. + res.push(arena, Highlight { start: 0, kind: unsafe { mem::zeroed() } }); + + loop { + instruction_decode!(self.assembly, self.registers.pc, { + Mov { dst, src } => { + let s = self.registers.get(src); + self.registers.set(dst, s); + } + Add { dst, src } => { + let d = self.registers.get(dst); + let s = self.registers.get(src); + self.registers.set(dst, d.saturating_add(s)); + } + Sub { dst, src } => { + let d = self.registers.get(dst); + let s = self.registers.get(src); + self.registers.set(dst, d.saturating_sub(s)); + } + MovImm { dst, imm } => { + self.registers.set(dst, imm); + } + AddImm { dst, imm } => { + let d = self.registers.get(dst); + self.registers.set(dst, d.saturating_add(imm)); + } + SubImm { dst, imm } => { + let d = self.registers.get(dst); + self.registers.set(dst, d.saturating_sub(imm)); + } + + Call { tgt } => { + // PC already points to the next instruction (= return address) + self.registers.save_registers(&mut self.stack); + self.registers.pc = tgt; + } + Return => { + if !self.registers.load_registers(&mut self.stack) { + self.registers = Registers { pc: self.entrypoint, ..Default::default() }; + break; + } + } + + JumpEQ { lhs, rhs, tgt } => { + if self.registers.get(lhs) == self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + JumpNE { lhs, rhs, tgt } => { + if self.registers.get(lhs) != self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + JumpLT { lhs, rhs, tgt } => { + if self.registers.get(lhs) < self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + JumpLE { lhs, rhs, tgt } => { + if self.registers.get(lhs) <= self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + JumpGT { lhs, rhs, tgt } => { + if self.registers.get(lhs) > self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + JumpGE { lhs, rhs, tgt } => { + if self.registers.get(lhs) >= self.registers.get(rhs) { + self.registers.pc = tgt; + } + } + + JumpIfEndOfLine { tgt } => { + if (self.registers.off as usize) >= line.len() { + self.registers.pc = tgt; + } + } + + JumpIfMatchCharset { idx, min, max, tgt } => { + let off = self.registers.off as usize; + let cs = &self.charsets[idx as usize]; + let min = min as usize; + let max = max as usize; + + if let Some(off) = Self::charset_gobble(line, off, cs, min, max) { + self.registers.off = off as u32; + self.registers.pc = tgt; + } + } + JumpIfMatchPrefix { idx, tgt } => { + let off = self.registers.off as usize; + let str = self.strings[idx as usize].as_bytes(); + + if Self::inlined_memcmp(line, off, str) { + self.registers.off = (off + str.len()) as u32; + self.registers.pc = tgt; + } + } + JumpIfMatchPrefixInsensitive { idx, tgt } => { + let off = self.registers.off as usize; + let str = self.strings[idx as usize].as_bytes(); + + if Self::inlined_memicmp(line, off, str) { + self.registers.off = (off + str.len()) as u32; + self.registers.pc = tgt; + } + } + + FlushHighlight { kind } => { + let kind = self.registers.get(kind); + let kind = unsafe { kind.try_into().unwrap_unchecked() }; + let start = (self.registers.hs as usize).min(line.len()); + + if let Some(last) = res.last_mut() + && (last.start == start || last.kind == kind) + { + last.kind = kind; + } else { + res.push(arena, Highlight { start, kind }); + } + + self.registers.hs = self.registers.off; + } + AwaitInput => { + let off = self.registers.off as usize; + if off >= line.len() { + break; + } + } + + _ => unreachable!(), + }); + } + + // Ensure that there's a past-the-end highlight. + if res.last().is_none_or(|last| last.start < line.len()) { + res.push(arena, Highlight { start: line.len(), kind: unsafe { mem::zeroed() } }); + } + + res + } + + // TODO: http://0x80.pl/notesen/2018-10-18-simd-byte-lookup.html#alternative-implementation + #[inline] + fn charset_gobble( + haystack: &[u8], + off: usize, + cs: &[u16; 16], + min: usize, + max: usize, + ) -> Option { + let mut i = 0usize; + while i < max { + let idx = off + i; + if idx >= haystack.len() || !Self::in_set(cs, haystack[idx]) { + break; + } + i += 1; + } + if i >= min { Some(off + i) } else { None } + } + + /// A mini-memcmp implementation for short needles. + /// Compares the `haystack` at `off` with the `needle`. + #[inline] + fn inlined_memcmp(haystack: &[u8], off: usize, needle: &[u8]) -> bool { + unsafe { + if off >= haystack.len() || haystack.len() - off < needle.len() { + return false; + } + + let a = haystack.as_ptr().add(off); + let b = needle.as_ptr(); + let mut i = 0; + + while i < needle.len() { + let a = *a.add(i); + let b = *b.add(i); + i += 1; + if a != b { + return false; + } + } + + true + } + } + + /// Like `inlined_memcmp`, but case-insensitive. + #[inline] + fn inlined_memicmp(haystack: &[u8], off: usize, needle: &[u8]) -> bool { + unsafe { + if off >= haystack.len() || haystack.len() - off < needle.len() { + return false; + } + + let a = haystack.as_ptr().add(off); + let b = needle.as_ptr(); + let mut i = 0; + + while i < needle.len() { + // str in PrefixInsensitive(str) is expected to be lowercase, printable ASCII. + let a = a.add(i).read().to_ascii_lowercase(); + let b = b.add(i).read(); + i += 1; + if a != b { + return false; + } + } + + true + } + } + + #[inline] + fn in_set(bitmap: &[u16; 16], byte: u8) -> bool { + let lo_nibble = byte & 0xf; + let hi_nibble = byte >> 4; + + let bitset = bitmap[lo_nibble as usize]; + let bitmask = 1u16 << hi_nibble; + + (bitset & bitmask) != 0 + } +} + #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum Register { @@ -154,6 +446,26 @@ impl Registers { unsafe { self.as_mut_ptr().add(reg as usize).write(val) } } + #[inline(always)] + fn save_registers(&self, vec: &mut Vec) { + unsafe { vec.extend_from_slice(std::slice::from_raw_parts(self.as_ptr().add(2), 14)) }; + } + + #[inline(always)] + fn load_registers(&mut self, vec: &mut Vec) -> bool { + unsafe { + if vec.len() < 14 { + return false; + } + + let src = vec.as_ptr().add(vec.len() - 14); + let dst = self.as_mut_ptr().add(2); + std::ptr::copy_nonoverlapping(src, dst, 14); + vec.truncate(vec.len() - 14); + true + } + } + #[inline(always)] unsafe fn as_ptr(&self) -> *const u32 { self as *const _ as *const u32 @@ -403,6 +715,8 @@ macro_rules! instruction_decode { }}; } +use instruction_decode; + impl Instruction { // JumpIfMatchCharset, etc., are 1 byte opcode + 4 u32 parameters. pub const MAX_ENCODED_SIZE: usize = 1 + 4 * 4; diff --git a/crates/edit/src/glob.rs b/crates/stdext/src/glob.rs similarity index 100% rename from crates/edit/src/glob.rs rename to crates/stdext/src/glob.rs diff --git a/crates/stdext/src/lib.rs b/crates/stdext/src/lib.rs index 30b59d549032..e009494acf97 100644 --- a/crates/stdext/src/lib.rs +++ b/crates/stdext/src/lib.rs @@ -6,6 +6,7 @@ pub mod alloc; pub mod arena; pub mod collections; +pub mod glob; mod helpers; pub mod simd; pub mod sys; diff --git a/i18n/edit.toml b/i18n/edit.toml index 01248b1577ad..5c992d32ebc4 100644 --- a/i18n/edit.toml +++ b/i18n/edit.toml @@ -376,6 +376,19 @@ vi = "Lưu thành…" zh_hans = "另存为…" zh_hant = "另存新檔…" +[FilePreferences] +en = "Preferences" +de = "Einstellungen" +es = "Configuración" +fr = "Paramètres" +it = "Impostazioni" +ja = "設定" +ko = "설정" +pt_br = "Configurações" +ru = "Параметры" +zh_hans = "设置" +zh_hant = "設定" + [FileClose] en = "Close File" ar = "إغلاق الملف" @@ -1661,6 +1674,32 @@ vi = "Đóng" zh_hans = "关闭" zh_hant = "關閉" +[LanguageSelectMode] +en = "Select Language Mode" +de = "Sprachmodus auswählen" +es = "Seleccionar modo de lenguaje" +fr = "Sélectionner le mode du langage" +it = "Seleziona modalità del linguaggio" +ja = "言語モードの選択" +ko = "언어 모드 선택" +pt_br = "Selecionar modo de linguagem" +ru = "Выбрать режим языка" +zh_hans = "选择语言模式" +zh_hant = "選擇語言模式" + +[LanguageAutoDetect] +en = "Auto Detect" +de = "Automatisch erkennen" +es = "Detección automática" +fr = "Détection automatique" +it = "Rilevamento automatico" +ja = "自動検出" +ko = "자동 감지" +pt_br = "Detectar automaticamente" +ru = "Определить автоматически" +zh_hans = "自动检测" +zh_hant = "自動偵測" + [EncodingReopen] en = "Reopen with encoding…" ar = "إعادة فتح مع الترميز…"