diff --git a/docs/src/content/docs/core-concepts/plugin-system.mdx b/docs/src/content/docs/core-concepts/plugin-system.mdx index 1d497da..f1ec10d 100644 --- a/docs/src/content/docs/core-concepts/plugin-system.mdx +++ b/docs/src/content/docs/core-concepts/plugin-system.mdx @@ -24,6 +24,7 @@ kit comes with built-in support for 12+ programming languages: - **Dart** (`.dart`) - Classes, functions, mixins, enums, extensions - **HCL/Terraform** (`.hcl`, `.tf`) - Resources, variables, modules - **Haskell** (`.hs`) - Module header, functions (including lambda-binds), common type-level declarations +- **YAML** (`.yaml`, `.yml`) - Top-level mapping keys Each language supports comprehensive symbol extraction including: - **Classes and interfaces** with inheritance relationships diff --git a/src/kit/queries/yaml/tags.scm b/src/kit/queries/yaml/tags.scm new file mode 100644 index 0000000..19749c1 --- /dev/null +++ b/src/kit/queries/yaml/tags.scm @@ -0,0 +1,10 @@ +;; tags.scm for YAML symbol extraction (tree-sitter-yaml) +;; Only captures top-level mapping keys (direct children of document root). +;; Use the full mapping pair as the definition so symbol spans/code include values. + +(stream + (document + (block_node + (block_mapping + (block_mapping_pair + key: (flow_node (_) @name)) @definition.key)))) diff --git a/src/kit/tree_sitter_symbol_extractor.py b/src/kit/tree_sitter_symbol_extractor.py index 273346f..128267f 100644 --- a/src/kit/tree_sitter_symbol_extractor.py +++ b/src/kit/tree_sitter_symbol_extractor.py @@ -34,6 +34,8 @@ ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".yaml": "yaml", + ".yml": "yaml", } @@ -350,11 +352,19 @@ def reset_plugins(cls) -> None: ".hxx": "cpp", ".zig": "zig", ".cs": "csharp", + ".yaml": "yaml", + ".yml": "yaml", } LANGUAGES.clear() LANGUAGES.update(original_languages) cls.LANGUAGES = set(LANGUAGES.keys()) + @staticmethod + def _strip_wrapping_quotes(text: str) -> str: + if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}: + return text[1:-1] + return text + @staticmethod def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: """Extracts symbols from source code using tree-sitter queries.""" @@ -454,10 +464,12 @@ def extract_symbols(ext: str, source_code: str) -> List[Dict[str, Any]]: if hasattr(actual_name_node, "text") and actual_name_node.text else str(actual_name_node) ) - # HCL: Strip quotes from string literals - if ext == ".tf" and hasattr(actual_name_node, "type") and actual_name_node.type == "string_lit": - if len(symbol_name) >= 2 and symbol_name.startswith('"') and symbol_name.endswith('"'): - symbol_name = symbol_name[1:-1] + node_type = actual_name_node.type if hasattr(actual_name_node, "type") else None + if ( + (ext == ".tf" and node_type == "string_lit") + or (ext in {".yaml", ".yml"} and node_type in {"double_quote_scalar", "single_quote_scalar"}) + ): + symbol_name = TreeSitterSymbolExtractor._strip_wrapping_quotes(symbol_name) definition_capture = next( ((name, node) for name, node in captures.items() if name.startswith("definition.")), None diff --git a/tests/test_symbol_extraction_multilang.py b/tests/test_symbol_extraction_multilang.py index 7776728..c6ddc49 100644 --- a/tests/test_symbol_extraction_multilang.py +++ b/tests/test_symbol_extraction_multilang.py @@ -9,6 +9,7 @@ ".java": "class Bar { void foo() {} }\n", ".rs": "fn foo() {}\nstruct Bar;\n", ".zig": "pub fn foo() void {}\npub const Bar = struct {};\n", + ".yaml": "foo: bar\nbaz: 1\n", } diff --git a/tests/test_tree_sitter_languages.py b/tests/test_tree_sitter_languages.py index f08160a..aa2202a 100644 --- a/tests/test_tree_sitter_languages.py +++ b/tests/test_tree_sitter_languages.py @@ -12,6 +12,7 @@ "c": b"int foo() { return 42; }\n", "dart": b"int foo() { return 42; }\n", "zig": b"pub fn foo() void { }\n", + "yaml": b"key: value\n", } diff --git a/tests/test_yaml_symbols.py b/tests/test_yaml_symbols.py new file mode 100644 index 0000000..aa141ef --- /dev/null +++ b/tests/test_yaml_symbols.py @@ -0,0 +1,83 @@ +import pytest + +from kit.tree_sitter_symbol_extractor import TreeSitterSymbolExtractor + +YAML_SAMPLE = """\ +name: my-app +version: 1.0.0 +database: + host: localhost + port: 5432 +logging: + level: info + format: json +""" + +YAML_QUOTED_SAMPLE = """\ +"foo.bar": 1 +'quoted': 2 +""" + + +def test_yaml_parser_and_query_available(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + tree = parser.parse(YAML_SAMPLE.encode("utf-8")) + assert tree.root_node is not None + + +def test_yaml_top_level_keys_only(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yaml", YAML_SAMPLE) + names = {s["name"] for s in symbols} + + assert "name" in names + assert "version" in names + assert "database" in names + assert "logging" in names + + assert "host" not in names + assert "port" not in names + assert "level" not in names + assert "format" not in names + + +def test_yaml_symbol_code_uses_full_mapping_pair(): + parser = TreeSitterSymbolExtractor.get_parser(".yaml") + query = TreeSitterSymbolExtractor.get_query(".yaml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yaml", YAML_SAMPLE) + database_symbol = next(s for s in symbols if s["name"] == "database") + + assert database_symbol["code"].startswith("database:") + assert "host: localhost" in database_symbol["code"] + assert database_symbol["end_line"] > database_symbol["start_line"] + + +def test_yaml_quoted_keys_and_yml_extension(): + parser = TreeSitterSymbolExtractor.get_parser(".yml") + query = TreeSitterSymbolExtractor.get_query(".yml") + if not parser or not query: + pytest.skip("YAML parser or query not available in this environment") + + symbols = TreeSitterSymbolExtractor.extract_symbols(".yml", YAML_QUOTED_SAMPLE) + names = {s["name"] for s in symbols} + + assert "foo.bar" in names + assert "quoted" in names + + +def test_yaml_extensions(): + supported = TreeSitterSymbolExtractor.list_supported_languages() + assert "yaml" in supported + assert ".yaml" in supported["yaml"] + assert ".yml" in supported["yaml"]