agents: Add GEMINI.md file and Jetski skill for core specification reader Change-Id: Id8922f417ae012958ad9e795114d1467df5fdcc8 Reviewed-on: https://bluetooth-review.googlesource.com/c/bluetooth/+/2900
diff --git a/.agents/skills/core_specification/SKILL.md b/.agents/skills/core_specification/SKILL.md new file mode 100644 index 0000000..3f70987 --- /dev/null +++ b/.agents/skills/core_specification/SKILL.md
@@ -0,0 +1,51 @@ +--- +name: Bluetooth Core Specification +description: Instructions for reading the Bluetooth Core Specification. +--- + +# Bluetooth Core Specification Management + +This resource explains how to access, search, and cite the Bluetooth Core Specification effectively using the dedicated local cache and management tools. + +## The Local Cache Strategy + +Since the official Bluetooth specification is massive and fragmented, we use a local cache of the seven most critical "Core" chapters. This allows for instant, reliable searching even for complex data like packet formats and section numbers. + +### **The Three-File System** +For every chapter in the core specification, the shared cache contains three versions: +1. **`<name>.md` (The Search Index):** Our primary search target. It features **Markdown tables** and **stitched headers** (e.g., "3.5.1. Pairing Request") on single lines for perfect `grep_search` results. +2. **`<name>_pretty.html` (The Context View):** A formatted HTML file used for `view_file`. Use this to see original tables, lists, and diagram references once you've found the correct section in the `.md` file. +3. **`<name>.html` (The Source):** The raw, minified official HTML from bluetooth.com. + +### **Section Index** +We maintain a global **`index.md`** in the specifications directory. +* **Use this first:** To find which file contains a specific section number or topic, run `grep_search` on `index.md`. The `index.md` output will contain the file name and the exact line number (e.g., `logical-link-control-and-adaptation-protocol-specification.md:1057`), allowing you to use `view_file` to jump directly to the correct line in the corresponding `.md` file. + +--- + +## Management Tooling + +### **Synchronizing the Cache** +To refresh all core specifications or initialize the cache, run the downloader directly with Python: +`python3 .agents/skills/core_specification/scripts/downloader.py` + +*Note: The generated Markdown files, HTML copies, and global `index.md` will be placed in the source folder at: `.agents/skills/core_specification/specifications/`* + +This tool automatically: +* Downloads the latest HTML from official URLs. +* Converts HTML tables into readable Markdown tables. +* Stitches fragmented section numbers into searchable headers. +* Re-generates the `index.md`. + +--- + +## How to Research and Search + +1. **Start with the Core Specs:** + * Use `grep_search` on `.agents/skills/core_specification/specifications/index.md` to find the correct file and line number. + * **Use Case Insensitivity:** The Bluetooth specification uses inconsistent capitalization. Always use `CaseInsensitive: true` when searching. + * Once you have the target file name and line number from `index.md`, use `view_file` to read the exact section in the corresponding `.md` file. +2. **When the topic is outside the Core Specs:** + * Use the `search_web` tool with `site:bluetooth.com/wp-content/uploads/Files/Specification/HTML/Core-62 "term"`. +3. **Citing the Specification:** + * Always provide the Volume, Part, and Section (e.g., "Vol 3, Part H, Section 3.5.1"). \ No newline at end of file
diff --git a/.agents/skills/core_specification/scripts/downloader.py b/.agents/skills/core_specification/scripts/downloader.py new file mode 100644 index 0000000..41f6217 --- /dev/null +++ b/.agents/skills/core_specification/scripts/downloader.py
@@ -0,0 +1,278 @@ +# Copyright 2026 The Pigweed Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +"""Downloads and formats Bluetooth core specification documents.""" + +import os +import re +import sys +from typing import Optional +from urllib.request import urlopen, Request +from urllib.error import URLError +from urllib.parse import urlparse +from bs4 import BeautifulSoup, Tag + +# Root project directory for specification cache +SPEC_DIR = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "specifications", +) + +# Hardcoded list of official Bluetooth Core Specification URLs +CORE_SPEC_URLS = [ + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host-controller-interface/" + "host-controller-interface-functional-specification.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host/" + "logical-link-control-and-adaptation-protocol-specification.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host/attribute-protocol--att-.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host/generic-access-profile.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host/generic-attribute-profile--gatt-.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/host/security-manager-specification.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/br-edr-controller/" + "link-manager-protocol-specification.html" + ), + ( + "https://www.bluetooth.com/wp-content/uploads/Files/Specification/" + "HTML/Core-62/out/en/low-energy-controller/" + "link-layer-specification.html" + ), +] + + +def fetch_html(url: str) -> Optional[str]: + """Downloads HTML content from the given URL.""" + print(f"Downloading: {url}...") + headers = {'User-Agent': 'Mozilla/5.0'} + try: + req = Request(url, headers=headers) + with urlopen(req, timeout=30) as response: + return response.read().decode('utf-8') + except URLError as e: + print(f"Error downloading {url}: {e}") + return None + + +def extract_metadata_header(soup: BeautifulSoup, url: str) -> str: + """Extracts the document title and formats an initial metadata header.""" + meta_header = f"Source URL: {url}\n" + title_div = soup.find('div', class_='titlepage') + + if title_div: + title_element = title_div.find(class_='title') + raw_title_source = title_element if title_element else title_div + raw_title = raw_title_source.get_text(separator=' ', strip=True) + # Remove internal SIG codenames/revisions like 'vAtlanta r00' + clean_title = re.sub(r'\bv[A-Za-z]+\s+r\d{2,}\b', '', raw_title) + # Clean up any double spaces left behind + clean_title = re.sub(r'\s{2,}', ' ', clean_title).strip() + + meta_header += clean_title + "\n" + meta_header += "=" * max(10, len(clean_title)) + "\n\n" + + return meta_header + + +def stitch_headers(content_root: Tag) -> None: + """Combines section numbers and titles into single Markdown headers.""" + for header in content_root.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']): + num = header.find("span", class_="formal-number") + title = header.find("span", class_="formal-title") + + if num and title: + # Prefix with Markdown header hashes based on heading level + level = int(header.name[1]) + hashes = "#" * level + stitched = ( + f"{hashes} {num.get_text().strip()}. {title.get_text().strip()}" + ) + header.clear() + header.append(stitched) + + +def convert_table_to_markdown(table: Tag) -> str: + """Converts a BeautifulSoup table element into a Markdown table string.""" + rows = [] + max_cols = 0 + for tr in table.find_all('tr'): + cells = [] + for cell in tr.find_all(['th', 'td']): + # Clean up cell text: remove extra newlines/spaces and escape pipes + text = ( + cell.get_text(separator=' ', strip=True) + .replace('|', '\\|') + .replace('\n', ' ') + ) + cells.append(text) + + max_cols = max(max_cols, len(cells)) + if cells: + rows.append(f"| {' | '.join(cells)} |") + + if not rows: + return "" + + # Create the separator row + if max_cols > 0: + separator = f"|{'---|' * max_cols}" + # Insert after header if possible + if len(rows) > 1: + rows.insert(1, separator) + else: + rows.append(separator) + + return '\n\n' + '\n'.join(rows) + '\n\n' + + +def replace_tables_with_markdown( + content_root: Tag, soup: BeautifulSoup +) -> None: + """Finds all tables in the HTML and replaces them with Markdown text.""" + for table in content_root.find_all('table'): + md_table = convert_table_to_markdown(table) + table.replace_with(soup.new_string(md_table)) + + +def extract_clean_text(meta_header: str, content_root: Tag) -> str: + """Extracts text content and cleans up excessive newlines.""" + text_content = content_root.get_text(separator='\n', strip=True) + # Remove internal SIG codenames/revisions like 'vAtlanta r00' + # (on its own line) + text_content = re.sub(r'(?im)^v[a-z]+\s+r\d{2,}\n?$', '', text_content) + full_text = meta_header + text_content + return re.sub(r'\n{3,}', '\n\n', full_text) + + +def process_spec(url: str) -> None: + """ + Coordinates the process of downloading, parsing, and saving a specification. + """ + sys.setrecursionlimit(10000) + + # Determine filenames + parsed_url = urlparse(url) + page_name = os.path.basename(parsed_url.path).replace('.html', '') + raw_path = os.path.join(SPEC_DIR, f"{page_name}.html") + pretty_path = os.path.join(SPEC_DIR, f"{page_name}_pretty.html") + md_path = os.path.join(SPEC_DIR, f"{page_name}.md") + + # Ensure output directory exists + os.makedirs(SPEC_DIR, exist_ok=True) + + if ( + os.path.exists(md_path) + and os.path.exists(raw_path) + and os.path.exists(pretty_path) + ): + print(f"Skipping cached spec: {page_name}") + return + + html_content = fetch_html(url) + if not html_content: + return + # Save raw HTML + with open(raw_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + soup = BeautifulSoup(html_content, 'html.parser') + + # Save prettified HTML + with open(pretty_path, 'w', encoding='utf-8') as f: + f.write(soup.prettify()) + + meta_header = extract_metadata_header(soup, url) + + main_article = soup.find('article', class_='topic') + content_root = main_article if main_article else soup + + stitch_headers(content_root) + replace_tables_with_markdown(content_root, soup) + text_content = extract_clean_text(meta_header, content_root) + + # Save Markdown + with open(md_path, 'w', encoding='utf-8') as f: + f.write(text_content) + print(f"Generated enhanced Markdown: {md_path}") + + +def generate_index() -> None: + """Scans all .md files and generates a global index.md.""" + print("\nGenerating Section Index...") + index_entries = [] + + # Regex to find lines starting with section numbers (prefixed with hashes) + section_pattern = re.compile(r'^(#+)\s+(\d+(\.\d+)*\.)\s+(.+)$') + + for filename in sorted(os.listdir(SPEC_DIR)): + if filename.endswith(".md") and filename != "index.md": + filepath = os.path.join(SPEC_DIR, filename) + spec_name = "" + + with open(filepath, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + stripped = line.strip() + # The second line is the spec title + if line_num == 2 and not spec_name: + spec_name = stripped + + match = section_pattern.match(stripped) + if match: + section_num = match.group(2) + title = match.group(4) + index_entries.append( + f"| {section_num} | {title} | {spec_name} | " + f"{filename}:{line_num} |" + ) + + index_path = os.path.join(SPEC_DIR, "index.md") + with open(index_path, 'w', encoding='utf-8') as f: + f.write("# Bluetooth Core Specification Index\n\n") + f.write("| Section | Title | Specification | File:Line |\n") + f.write("|---|---|---|---|\n") + + # Remove duplicates while keeping order + seen = set() + for entry in index_entries: + if entry not in seen: + f.write(entry + "\n") + seen.add(entry) + + print(f"Index created: {index_path}") + + +if __name__ == '__main__': + for spec_url in CORE_SPEC_URLS: + process_spec(spec_url) + + generate_index() + print("\nAll core specifications synchronized and indexed successfully.")
diff --git a/.gitignore b/.gitignore index 8bcc936..b45d8bf 100644 --- a/.gitignore +++ b/.gitignore
@@ -3,3 +3,6 @@ # CIPD files. infra/packages + +# Specification files. +.agents/skills/core_specification/specifications
diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 0000000..3b84746 --- /dev/null +++ b/GEMINI.md
@@ -0,0 +1,3 @@ +# Gemini Assistant Guidelines - Bluetooth Workspace + +See [README.md](./README.md) for project structure and description, and general guidelines for development. \ No newline at end of file