advanced-security · data-douser · Apr 23, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
@@ -39,7 +39,7 @@ jobs:
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
 
       - name: Setup Go environment
-        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
         with:
           cache-dependency-path: 'client/go.sum'
           go-version-file: 'client/go.mod'
@@ -92,7 +92,7 @@ jobs:
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
 
       - name: Setup Go environment
-        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
         with:
           cache-dependency-path: 'client/go.sum'
           go-version-file: 'client/go.mod'

@@ -25,7 +25,7 @@ jobs:
           node-version-file: '.node-version'
 
       - name: Lint and Format - Setup Go
-        uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
+        uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
         with:
           go-version-file: 'client/go.mod'
           cache-dependency-path: 'client/go.sum'

@@ -14,6 +14,10 @@ release cadence.
 
 _Changes on `main` since the latest tagged release that have not yet been included in a stable release._
 
+### Fixed
+
+- **`query_results_cache_retrieve` rejected by GitHub Copilot Chat (HTTP 400 invalid schema)** — The `lineRange` and `resultIndices` parameters were defined with `z.tuple([...])`, which the MCP SDK serialized to a bare-array JSON Schema value (e.g. `[{"type":"integer"}, {"type":"integer"}]`). GitHub Copilot Chat enforces strict JSON Schema validation and rejected the entire `ql-mcp` server with `"... is not of type 'object', 'boolean'"`. Both parameters now use `z.object({ start, end })` so they serialize to a valid `type: "object"` JSON Schema. Tool callers must now pass `{ "lineRange": { "start": 1, "end": 10 } }` instead of `{ "lineRange": [1, 10] }`. ([#263](https://github.com/advanced-security/codeql-development-mcp-server/pull/263))
+
 ## [v2.25.2] — 2026-04-15
 
 ### Highlights

@@ -2,7 +2,8 @@
   "toolName": "query_results_cache_retrieve",
   "parameters": {
     "cacheKey": "nonexistent-key-for-test",
-    "maxLines": 50
+    "maxLines": 50,
+    "lineRange": { "start": 1, "end": 10 }
   },
   "success": true,
   "description": "Successfully handled cache retrieve for non-existent key"

@@ -2,7 +2,8 @@
   "toolName": "query_results_cache_retrieve",
   "parameters": {
     "cacheKey": "nonexistent-key-for-test",
-    "maxLines": 50
+    "maxLines": 50,
+    "lineRange": { "start": 1, "end": 10 }
   },
   "expectedSuccess": true,
   "description": "Test query_results_cache_retrieve returns appropriate message for missing cache key"

@@ -2,6 +2,7 @@
   "toolName": "query_results_cache_retrieve",
   "arguments": {
     "cacheKey": "nonexistent-key-for-test",
-    "maxLines": 50
+    "maxLines": 50,
+    "lineRange": { "start": 1, "end": 10 }
   }
 }
@@ -471,6 +471,82 @@ suite('MCP Annotation & Audit Tool Integration Tests', () => {
     const retrieveText = (retrieveResult.content as Array<{ type: string; text: string }>)[0]?.text ?? '';
     assert.ok(retrieveText.includes('No cached result'), `Should handle missing key. Got: ${retrieveText}`);
 
+    // Step 5: Retrieve with the new object-form lineRange and resultIndices
+    // parameters (regression: these used to be tuples, which serialized to an
+    // invalid JSON Schema and broke GitHub Copilot Chat with HTTP 400).
+    const retrieveWithSubsetResult = await client.callTool({
+      name: 'query_results_cache_retrieve',
+      arguments: {
+        cacheKey: 'nonexistent-test-key',
+        lineRange: { start: 1, end: 10 },
+        resultIndices: { start: 0, end: 5 },
+      },
+    });
+    assert.ok(
+      !retrieveWithSubsetResult.isError,
+      `query_results_cache_retrieve with subset should succeed. Got: ${JSON.stringify(retrieveWithSubsetResult.content)}`,
+    );
+
     console.log('[mcp-annotation-e2e] Query results cache test passed');
   });
+
+  /**
+   * Regression test for the bug reported in the issue:
+   *   "Fix invalid schema for query_results_cache_retrieve tool use in
+   *    VS Code Copilot Chat".
+   *
+   * The GitHub Copilot Chat backend rejects MCP tools whose JSON Schema for
+   * any input parameter is not an object or boolean. The original
+   * `query_results_cache_retrieve` tool defined `lineRange` / `resultIndices`
+   * with `z.tuple([...])`, which serialized to a bare-array schema value and
+   * caused HTTP 400 responses from Copilot.
+   *
+   * This test enforces — at the live wire-protocol level — that EVERY tool's
+   * `inputSchema` is itself an object schema and that every property's schema
+   * value is also an object or boolean (never an array).
+   */
+  test('Every tool inputSchema is a valid strict JSON Schema (no array-valued schemas)', async function () {
+    this.timeout(15_000);
+
+    const response = await client.listTools();
+    assert.ok(response.tools && response.tools.length > 0, 'Server should list tools');
+
+    const offending: string[] = [];
+    for (const tool of response.tools) {
+      const inputSchema = tool.inputSchema as
+        | { type?: string; properties?: Record<string, unknown> }
+        | undefined;
+
+      assert.ok(inputSchema, `Tool ${tool.name} should have an inputSchema`);
+      assert.ok(
+        typeof inputSchema === 'object' && !Array.isArray(inputSchema),
+        `Tool ${tool.name} inputSchema must be a JSON Schema object, got: ${JSON.stringify(inputSchema)}`,
+      );
+
+      const properties = inputSchema.properties ?? {};
+      for (const [propName, propSchema] of Object.entries(properties)) {
+        const isValid =
+          typeof propSchema === 'boolean' ||
+          (typeof propSchema === 'object' && propSchema !== null && !Array.isArray(propSchema));
+        if (!isValid) {
+          offending.push(`${tool.name}.${propName} = ${JSON.stringify(propSchema)}`);
+        }
+      }
+    }
+
+    assert.deepStrictEqual(
+      offending,
+      [],
+      `The following tool input properties have invalid (non-object/boolean) JSON Schema values, ` +
+        `which causes GitHub Copilot Chat to reject the server with HTTP 400:\n  ${offending.join('\n  ')}`,
+    );
+
+    // Spot-check the originally-broken tool/properties.
+    const retrieveTool = response.tools.find(t => t.name === 'query_results_cache_retrieve');
+    assert.ok(retrieveTool, 'query_results_cache_retrieve must be present');
+    const props = (retrieveTool.inputSchema as { properties?: Record<string, { type?: string }> })
+      .properties ?? {};
+    assert.strictEqual(props.lineRange?.type, 'object', 'lineRange must serialize as an object schema');
+    assert.strictEqual(props.resultIndices?.type, 'object', 'resultIndices must serialize as an object schema');
+  });
 });
@@ -8,6 +8,7 @@
  */
 
 // Static imports — esbuild inlines the file contents as string literals.
+import dataExtensionsOverviewContent from '../resources/data-extensions-overview.md';
 import dataflowMigrationContent from '../resources/dataflow-migration-v1-to-v2.md';
 import learningQueryBasicsContent from '../resources/learning-query-basics.md';
 import performancePatternsContent from '../resources/performance-patterns.md';
@@ -82,6 +83,13 @@ export function getQueryUnitTesting(): string {
   return queryUnitTestingContent;
 }
 
+/**
+ * Get the data extensions overview content
+ */
+export function getDataExtensionsOverview(): string {
+  return dataExtensionsOverviewContent;
+}
+
 /**
  * Get the dataflow migration (v1 to v2) guide content
  */

@@ -0,0 +1,148 @@
+---
+agent: agent
+---
+
+# Data Extension Development Workflow
+
+Use this workflow to create CodeQL data extensions (Models-as-Data) for third-party libraries and frameworks. Data extensions let you customize taint tracking without writing QL code — you author YAML files that declare which functions are sources, sinks, summaries, barriers, or barrier guards.
+
+For format reference, read the MCP resource: `codeql://learning/data-extensions`
+For language-specific guidance, read the corresponding `codeql://languages/<language>/library-modeling` resource. Available for: `cpp`, `csharp`, `go`, `java`, `javascript`, `python`, `ruby`, `rust`, `swift`.
+
+## Workflow Checklist
+
+### Phase 1: Identify the Target
+
+- [ ] **Confirm the target library and language**
+  - Library name and version: {{libraryName}}
+  - Target language: {{language}}
+  - Determine the model format:
+    - **MaD tuple format** (9–10 column tuples): C/C++ (`codeql/cpp-all`), C# (`codeql/csharp-all`), Go (`codeql/go-all`), Java/Kotlin (`codeql/java-all`)
+    - **API Graph format** (3–5 column tuples): JavaScript/TypeScript (`codeql/javascript-all`), Python (`codeql/python-all`), Ruby (`codeql/ruby-all`)
-    - **MaD tuple format** (9–10 column tuples): C/C++ (`codeql/cpp-all`), C# (`codeql/csharp-all`), Go (`codeql/go-all`), Java/Kotlin (`codeql/java-all`)
-    - **API Graph format** (3–5 column tuples): JavaScript/TypeScript (`codeql/javascript-all`), Python (`codeql/python-all`), Ruby (`codeql/ruby-all`)
+    - **MaD tuple format** (9–10 column tuples): C/C++ (`codeql/cpp-all`), C# (`codeql/csharp-all`), Go (`codeql/go-all`), Java/Kotlin (`codeql/java-all`), Swift (`codeql/swift-all`)
+    - **API Graph format** (3–5 column tuples): JavaScript/TypeScript (`codeql/javascript-all`), Python (`codeql/python-all`), Ruby (`codeql/ruby-all`)
+    - **Rust format**: Rust (`codeql/rust-all`) uses its own crate-path-based model format; follow `codeql://languages/rust/library-modeling`
-    - **MaD tuple format** (9–10 column tuples): C/C++ (`codeql/cpp-all`), C# (`codeql/csharp-all`), Go (`codeql/go-all`), Java/Kotlin (`codeql/java-all`)
-    - **API Graph format** (3–5 column tuples): JavaScript/TypeScript (`codeql/javascript-all`), Python (`codeql/python-all`), Ruby (`codeql/ruby-all`)
+    - **MaD tuple format** (9–10 column tuples): C/C++ (`codeql/cpp-all`), C# (`codeql/csharp-all`), Go (`codeql/go-all`), Java/Kotlin (`codeql/java-all`), Swift (`codeql/swift-all`)
+    - **API Graph format** (3–5 column tuples): JavaScript/TypeScript (`codeql/javascript-all`), Python (`codeql/python-all`), Ruby (`codeql/ruby-all`)
+    - **Rust format**: Rust (`codeql/rust-all`) uses its own crate-path-based model format; follow `codeql://languages/rust/library-modeling`
+  - Using the wrong format will cause the extension to silently fail to load.
+
+- [ ] **Locate a CodeQL database**
+  - Tool: #list_codeql_databases
+  - Or create one: #codeql_database_create
+  - The database must contain code that exercises the target library
+
+- [ ] **Explore the library's API surface**
+  - Tool: #read_database_source — browse source files to identify relevant API calls
+  - Tool: #codeql_query_run with `queryName="PrintAST"` — visualize how library calls are represented
+  - Skim the library's public API docs, type stubs, or source code
+
+### Phase 2: Classify the API Surface
+
+For each public function or method on the library, classify it:
+
+1. **Does it return data from outside the program** (network, file, env, stdin)? → `sourceModel` with `kind` matching the threat model (usually `"remote"`)
+2. **Does it consume data in a security-sensitive operation** (SQL, exec, path, redirect, eval, deserialize)? → `sinkModel` with `kind` matching the vulnerability class (e.g. `"sql-injection"`, `"command-injection"`)
+3. **Does it pass data through opaque library code** (encode, decode, wrap, copy, iterate)? → `summaryModel` with `kind: "taint"` (derived) or `kind: "value"` (identity)
+4. **Does it sanitize data so its output is safe for a specific sink kind?** → `barrierModel` with `kind` matching the sink kind it neutralizes
+5. **Does it return a boolean indicating whether data is safe?** → `barrierGuardModel` with the appropriate `acceptingValue` (`"true"` or `"false"`) and matching `kind`
+6. **Is the type a subclass of something already modeled?** → `typeModel` (API Graph languages) or set `subtypes: True` (MaD tuple languages)
+7. **Did the auto-generated model assign a wrong summary?** → `neutralModel` to suppress it
+
+A complete chain of **source → (summary\*) → sink** is required for end-to-end findings; missing a single hop will cause false negatives.
+
+### Phase 3: Choose the Deployment Scope
+
+Choose between two paths:
+
+- **Single-repo shortcut** — drop `.model.yml` files under `.github/codeql/extensions/<pack-name>/` in the consuming repo. **No `codeql-pack.yml` is required**; Code Scanning auto-loads extensions from this directory. Use when the models only need to apply to one repo.
+- **Reusable model pack** — create a pack directory with a `codeql-pack.yml` declaring `extensionTargets` and `dataExtensions`. Use when models will be consumed by multiple repos or by org-wide Default Setup.
+
+### Phase 4: Author the `.model.yml` File(s)
+
+- [ ] **Create the model file**
+  - Use naming convention `<library>-<module>.model.yml` (lowercase, hyphen-separated)
+  - Split per logical module rather than putting an entire ecosystem in one file
+  - Read `codeql://languages/{{language}}/library-modeling` for the exact column layout and examples
+
+- [ ] **Write the YAML with correct extensible predicates**
+
+  ```yaml
+  extensions:
+    - addsTo:
+        pack: codeql/{{language}}-all
+        extensible: sinkModel
+      data:
+        # Add tuples here — column count must exactly match the predicate schema
+        - [...]
+  ```
+
+  - Every row must have the **exact column count** for its extensible predicate — an invalid row will fail silently or cause errors
+  - Use `provenance: 'manual'` (MaD format) for hand-written rows
+  - Ensure `kind` values match across the chain (e.g. a `"sql-injection"` barrier must guard a `"sql-injection"` sink)
+
+### Phase 5: Configure `codeql-pack.yml` (Model-Pack Path Only)
+
+Skip this step if you chose the `.github/codeql/extensions/` shortcut in Phase 3.
+
+For a reusable pack, create or update `codeql-pack.yml`:
+
+```yaml
+name: <org>/<language>-<pack-name>
+version: 0.0.1
+library: true
+extensionTargets:
+  codeql/<language>-all: '*'
+dataExtensions:
+  - models/**/*.yml
+```
+
+- `library: true` — model packs are always libraries, never queries
+- `extensionTargets` — names the upstream pack the extensions extend
+- `dataExtensions` — a glob that picks up every `.model.yml` you author
+
+- [ ] **Install pack dependencies**
+  - Tool: #codeql_pack_install — resolve dependencies for the model pack
+
+### Phase 6: Test with `codeql query run`
+
+Validate the model against a real database:
+
+- [ ] **Run a relevant security query with the extension applied**
+  - Tool: #codeql_query_run
+  - Pass the model pack directory via the `additionalPacks` parameter
+  - Pick a query whose sink kind matches what you modeled (e.g. a `sql-injection` query when adding SQL sinks)
+  - Decode results: #codeql_bqrs_decode or #codeql_bqrs_interpret
+
+- [ ] **Verify expected findings appear**
+  - New sources/sinks should produce findings that were absent without the extension
+  - Barriers/barrier guards should suppress findings that were previously reported
+
+### Phase 7: Run Unit Tests with `codeql test run`
+
+- [ ] **Create a test case for the extension**
+  - Write a small test file that exercises the new source/sink/summary chain end-to-end
+  - Include both positive cases (vulnerable code detected) and negative cases (safe code not flagged)
+
+- [ ] **Run the tests**
+  - Tool: #codeql_test_run
+  - Pass the model pack directory via the `additionalPacks` parameter
+  - Note: `codeql test run` does **not** accept `--model-packs`; extensions must be wired via `codeql-pack.yml` or `--additional-packs`
+
+- [ ] **Accept correct results**
+  - Tool: #codeql_test_accept — accept the `.actual` output as the `.expected` baseline once you confirm it is correct
+
+### Phase 8: Decide Next Steps
+
+- If the `.model.yml` lives under `.github/codeql/extensions/` of the consuming repo, you are **done** — Code Scanning will load it on the next analysis.
+- If you authored a reusable model pack and want it to apply across an organization, publish it to GHCR with `codeql pack publish` and configure it under org Code security → Global settings → CodeQL analysis → Model packs.
+
+## Validation Checklist
+
+- [ ] Correct tuple format for the language (API Graph vs MaD)
+- [ ] Every row has the exact column count for its extensible predicate
+- [ ] Sink/barrier `kind` values match across the chain
+- [ ] At least one end-to-end test exercises the new model and produces expected findings
+- [ ] `codeql-pack.yml` `dataExtensions` glob actually matches the new files
+- [ ] No regressions in pre-existing tests under the same pack
+
+## Related Resources
+
+- `codeql://learning/data-extensions` — Common data extensions overview (both model formats)
+- `codeql://languages/{{language}}/library-modeling` — Language-specific library modeling guide
+- `codeql://templates/security` — Security query templates
+- `codeql://learning/test-driven-development` — TDD workflow for CodeQL queries