1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "provenance" : []
7+ },
8+ "kernelspec" : {
9+ "name" : " python3" ,
10+ "display_name" : " Python 3"
11+ },
12+ "language_info" : {
13+ "name" : " python"
14+ }
15+ },
16+ "cells" : [
17+ {
18+ "cell_type" : " code" ,
19+ "source" : [
20+ " import re, json, time, random\n " ,
21+ " from dataclasses import dataclass\n " ,
22+ " from typing import Callable, Dict, Any, List, Tuple\n " ,
23+ " \n " ,
24+ " @dataclass\n " ,
25+ " class ToolSpec:\n " ,
26+ " name: str\n " ,
27+ " description: str\n " ,
28+ " inputs: Dict[str, str]\n " ,
29+ " outputs: Dict[str, str]\n " ,
30+ " \n " ,
31+ " def parse_doc_to_spec(name: str, doc: str) -> ToolSpec:\n " ,
32+ " desc = doc.strip().splitlines()[0].strip() if doc.strip() else name\n " ,
33+ " arg_block = \"\\ n\" .join([l for l in doc.splitlines() if \" --\" in l or \" :\" in l])\n " ,
34+ " inputs = {}\n " ,
35+ " for line in arg_block.splitlines():\n " ,
36+ " m = re.findall(r\" (--?\\ w[\\ w-]*|\\ b\\ w+\\ b)\\ s*[:=]?\\ s*(\\ w+)?\" , line)\n " ,
37+ " for key, typ in m:\n " ,
38+ " k = key.lstrip(\" -\" )\n " ,
39+ " if k and k not in inputs and k not in [\" Returns\" ,\" Output\" ,\" Outputs\" ]:\n " ,
40+ " inputs[k] = (typ or \" str\" )\n " ,
41+ " if not inputs: inputs = {\" in\" : \" str\" }\n " ,
42+ " return ToolSpec(name=name, description=desc, inputs=inputs, outputs={\" out\" :\" json\" })"
43+ ],
44+ "metadata" : {
45+ "id" : " Xk2RZH5ZSjHn"
46+ },
47+ "execution_count" : null ,
48+ "outputs" : []
49+ },
50+ {
51+ "cell_type" : " code" ,
52+ "source" : [
53+ " def tool_fastqc(seq_fasta: str, min_len:int=30) -> Dict[str,Any]:\n " ,
54+ " seqs = [s for s in re.split(r\" >[^\\ n]*\\ n\" , seq_fasta)[1:]]\n " ,
55+ " lens = [len(re.sub(r\"\\ s+\" ,\"\" ,s)) for s in seqs]\n " ,
56+ " q30 = sum(l>=min_len for l in lens)/max(1,len(lens))\n " ,
57+ " gc = sum(c in \" GCgc\" for s in seqs for c in s)/max(1,sum(lens))\n " ,
58+ " return {\" n_seqs\" :len(lens),\" len_mean\" :(sum(lens)/max(1,len(lens))),\" pct_q30\" :q30,\" gc\" :gc}\n " ,
59+ " \n " ,
60+ " def tool_bowtie2_like(ref:str, reads:str, mode:str=\" end-to-end\" ) -> Dict[str,Any]:\n " ,
61+ " def revcomp(s):\n " ,
62+ " t=str.maketrans(\" ACGTacgt\" ,\" TGCAtgca\" ); return s.translate(t)[::-1]\n " ,
63+ " reads_list=[r for r in re.split(r\" >[^\\ n]*\\ n\" , reads)[1:]]\n " ,
64+ " ref_seq=\"\" .join(ref.splitlines()[1:])\n " ,
65+ " hits=[]\n " ,
66+ " for i,r in enumerate(reads_list):\n " ,
67+ " rseq=\"\" .join(r.split())\n " ,
68+ " aligned = (rseq in ref_seq) or (revcomp(rseq) in ref_seq)\n " ,
69+ " hits.append({\" read_id\" :i,\" aligned\" :bool(aligned),\" pos\" :ref_seq.find(rseq)})\n " ,
70+ " return {\" n\" :len(hits),\" aligned\" :sum(h[\" aligned\" ] for h in hits),\" mode\" :mode,\" hits\" :hits}\n " ,
71+ " \n " ,
72+ " def tool_bcftools_like(ref:str, alt:str, win:int=15) -> Dict[str,Any]:\n " ,
73+ " ref_seq=\"\" .join(ref.splitlines()[1:]); alt_seq=\"\" .join(alt.splitlines()[1:])\n " ,
74+ " n=min(len(ref_seq),len(alt_seq)); vars=[]\n " ,
75+ " for i in range(n):\n " ,
76+ " if ref_seq[i]!=alt_seq[i]: vars.append({\" pos\" :i,\" ref\" :ref_seq[i],\" alt\" :alt_seq[i]})\n " ,
77+ " return {\" n_sites\" :n,\" n_var\" :len(vars),\" variants\" :vars[:win]}\n " ,
78+ " \n " ,
79+ " FASTQC_DOC = \"\"\" FastQC-like quality control for FASTA\n " ,
80+ " --seq_fasta: str --min_len: int Outputs: json\"\"\"\n " ,
81+ " BOWTIE_DOC = \"\"\" Bowtie2-like aligner\n " ,
82+ " --ref: str --reads: str --mode: str Outputs: json\"\"\"\n " ,
83+ " BCF_DOC = \"\"\" bcftools-like variant caller\n " ,
84+ " --ref: str --alt: str --win: int Outputs: json\"\"\" "
85+ ],
86+ "metadata" : {
87+ "id" : " iH7D9NKDSkHx"
88+ },
89+ "execution_count" : null ,
90+ "outputs" : []
91+ },
92+ {
93+ "cell_type" : " code" ,
94+ "source" : [
95+ " @dataclass\n " ,
96+ " class MCPTool:\n " ,
97+ " spec: ToolSpec\n " ,
98+ " fn: Callable[..., Dict[str,Any]]\n " ,
99+ " \n " ,
100+ " class MCPServer:\n " ,
101+ " def __init__(self): self.tools: Dict[str,MCPTool] = {}\n " ,
102+ " def register(self, name:str, doc:str, fn:Callable[...,Dict[str,Any]]):\n " ,
103+ " spec = parse_doc_to_spec(name, doc); self.tools[name]=MCPTool(spec, fn)\n " ,
104+ " def list_tools(self) -> List[Dict[str,Any]]:\n " ,
105+ " return [dict(name=t.spec.name, description=t.spec.description, inputs=t.spec.inputs, outputs=t.spec.outputs) for t in self.tools.values()]\n " ,
106+ " def call_tool(self, name:str, args:Dict[str,Any]) -> Dict[str,Any]:\n " ,
107+ " if name not in self.tools: raise KeyError(f\" tool {name} not found\" )\n " ,
108+ " spec = self.tools[name].spec\n " ,
109+ " kwargs={k:args.get(k) for k in spec.inputs.keys()}\n " ,
110+ " return self.tools[name].fn(**kwargs)\n " ,
111+ " \n " ,
112+ " server=MCPServer()\n " ,
113+ " server.register(\" fastqc\" , FASTQC_DOC, tool_fastqc)\n " ,
114+ " server.register(\" bowtie2\" , BOWTIE_DOC, tool_bowtie2_like)\n " ,
115+ " server.register(\" bcftools\" , BCF_DOC, tool_bcftools_like)\n " ,
116+ " \n " ,
117+ " Task = Tuple[str, Dict[str,Any]]\n " ,
118+ " PIPELINES = {\n " ,
119+ " \" rnaseq_qc_align_call\" :[\n " ,
120+ " (\" fastqc\" , {\" seq_fasta\" :\" {reads}\" , \" min_len\" :30}),\n " ,
121+ " (\" bowtie2\" , {\" ref\" :\" {ref}\" , \" reads\" :\" {reads}\" , \" mode\" :\" end-to-end\" }),\n " ,
122+ " (\" bcftools\" , {\" ref\" :\" {ref}\" , \" alt\" :\" {alt}\" , \" win\" :15}),\n " ,
123+ " ]\n " ,
124+ " }\n " ,
125+ " \n " ,
126+ " def compile_pipeline(nl_request:str) -> List[Task]:\n " ,
127+ " key = \" rnaseq_qc_align_call\" if re.search(r\" rna|qc|align|variant|call\" , nl_request, re.I) else \" rnaseq_qc_align_call\"\n " ,
128+ " return PIPELINES[key]"
129+ ],
130+ "metadata" : {
131+ "id" : " CGM9J07zSm-p"
132+ },
133+ "execution_count" : null ,
134+ "outputs" : []
135+ },
136+ {
137+ "cell_type" : " code" ,
138+ "source" : [
139+ " def mk_fasta(header:str, seq:str)->str: return f\" >{header}\\ n{seq}\\ n\"\n " ,
140+ " random.seed(0)\n " ,
141+ " REF_SEQ=\"\" .join(random.choice(\" ACGT\" ) for _ in range(300))\n " ,
142+ " REF = mk_fasta(\" ref\" ,REF_SEQ)\n " ,
143+ " READS = mk_fasta(\" r1\" , REF_SEQ[50:130]) + mk_fasta(\" r2\" ,\" ACGT\" *15) + mk_fasta(\" r3\" , REF_SEQ[180:240])\n " ,
144+ " ALT = mk_fasta(\" alt\" , REF_SEQ[:150] + \" T\" + REF_SEQ[151:])\n " ,
145+ " \n " ,
146+ " def run_pipeline(nl:str, ctx:Dict[str,str]) -> Dict[str,Any]:\n " ,
147+ " plan=compile_pipeline(nl); results=[]; t0=time.time()\n " ,
148+ " for name, arg_tpl in plan:\n " ,
149+ " args={k:(v.format(**ctx) if isinstance(v,str) else v) for k,v in arg_tpl.items()}\n " ,
150+ " out=server.call_tool(name, args)\n " ,
151+ " results.append({\" tool\" :name,\" args\" :args,\" output\" :out})\n " ,
152+ " return {\" request\" :nl,\" elapsed_s\" :round(time.time()-t0,4),\" results\" :results}"
153+ ],
154+ "metadata" : {
155+ "id" : " eb6JS_luSo19"
156+ },
157+ "execution_count" : null ,
158+ "outputs" : []
159+ },
160+ {
161+ "cell_type" : " code" ,
162+ "execution_count" : 2 ,
163+ "metadata" : {
164+ "colab" : {
165+ "base_uri" : " https://localhost:8080/"
166+ },
167+ "id" : " nb0-nNYkRHuC" ,
168+ "outputId" : " e42167a9-40f4-45b2-a5d9-9ed195bd6d84"
169+ },
170+ "outputs" : [
171+ {
172+ "output_type" : " stream" ,
173+ "name" : " stdout" ,
174+ "text" : [
175+ " == TOOLS ==\n " ,
176+ " [\n " ,
177+ " {\n " ,
178+ " \" name\" : \" fastqc\" ,\n " ,
179+ " \" description\" : \" FastQC-like quality control for FASTA\" ,\n " ,
180+ " \" inputs\" : {\n " ,
181+ " \" seq_fasta\" : \" str\" ,\n " ,
182+ " \" min_len\" : \" int\"\n " ,
183+ " },\n " ,
184+ " \" outputs\" : {\n " ,
185+ " \" out\" : \" json\"\n " ,
186+ " }\n " ,
187+ " },\n " ,
188+ " {\n " ,
189+ " \" name\" : \" bowtie2\" ,\n " ,
190+ " \" description\" : \" Bowtie2-like aligner\" ,\n " ,
191+ " \" inputs\" : {\n " ,
192+ " \" ref\" : \" str\" ,\n " ,
193+ " \" reads\" : \" str\" ,\n " ,
194+ " \" mode\" : \" str\"\n " ,
195+ " },\n " ,
196+ " \" outputs\" : {\n " ,
197+ " \" out\" : \" json\"\n " ,
198+ " }\n " ,
199+ " },\n " ,
200+ " {\n " ,
201+ " \" name\" : \" bcftools\" ,\n " ,
202+ " \" description\" : \" bcftools-like variant caller\" ,\n " ,
203+ " \" inputs\" : {\n " ,
204+ " \" ref\" : \" str\" ,\n " ,
205+ " \" alt\" : \" str\" ,\n " ,
206+ " \" win\" : \" int\"\n " ,
207+ " },\n " ,
208+ " \" outputs\" : {\n " ,
209+ " \" out\" : \" json\"\n " ,
210+ " }\n " ,
211+ " }\n " ,
212+ " ]\n " ,
213+ " \n " ,
214+ " == INDIVIDUAL BENCH ==\n " ,
215+ " [\n " ,
216+ " {\n " ,
217+ " \" tool\" : \" fastqc\" ,\n " ,
218+ " \" ok\" : true,\n " ,
219+ " \" ms\" : 0,\n " ,
220+ " \" out_keys\" : [\n " ,
221+ " \" n_seqs\" ,\n " ,
222+ " \" len_mean\" ,\n " ,
223+ " \" pct_q30\" ,\n " ,
224+ " \" gc\"\n " ,
225+ " ],\n " ,
226+ " \" err\" : null\n " ,
227+ " },\n " ,
228+ " {\n " ,
229+ " \" tool\" : \" bowtie2\" ,\n " ,
230+ " \" ok\" : true,\n " ,
231+ " \" ms\" : 0,\n " ,
232+ " \" out_keys\" : [\n " ,
233+ " \" n\" ,\n " ,
234+ " \" aligned\" ,\n " ,
235+ " \" mode\" ,\n " ,
236+ " \" hits\"\n " ,
237+ " ],\n " ,
238+ " \" err\" : null\n " ,
239+ " },\n " ,
240+ " {\n " ,
241+ " \" tool\" : \" bcftools\" ,\n " ,
242+ " \" ok\" : true,\n " ,
243+ " \" ms\" : 0,\n " ,
244+ " \" out_keys\" : [\n " ,
245+ " \" n_sites\" ,\n " ,
246+ " \" n_var\" ,\n " ,
247+ " \" variants\"\n " ,
248+ " ],\n " ,
249+ " \" err\" : null\n " ,
250+ " }\n " ,
251+ " ]\n " ,
252+ " \n " ,
253+ " == PIPELINE BENCH ==\n " ,
254+ " {\n " ,
255+ " \" pipeline\" : \" rnaseq_qc_align_call\" ,\n " ,
256+ " \" ok\" : true,\n " ,
257+ " \" ms\" : 0,\n " ,
258+ " \" n_steps\" : 3\n " ,
259+ " }\n " ,
260+ " \n " ,
261+ " == PIPELINE RUN ==\n " ,
262+ " {\n " ,
263+ " \" request\" : \" Run RNA-seq QC, align, and variant call.\" ,\n " ,
264+ " \" elapsed_s\" : 0.0001,\n " ,
265+ " \" results\" : [\n " ,
266+ " {\n " ,
267+ " \" tool\" : \" fastqc\" ,\n " ,
268+ " \" args\" : {\n " ,
269+ " \" seq_fasta\" : \" >r1\\ nGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTC\\ n>r2\\ nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\\ n>r3\\ nTTTATTACTAGCTTAATGGTATCACATTGACAAACACGGCATTAAGTAGCGACGAAACGG\\ n\" ,\n " ,
270+ " \" min_len\" : 30\n " ,
271+ " },\n " ,
272+ " \" output\" : {\n " ,
273+ " \" n_seqs\" : 3,\n " ,
274+ " \" len_mean\" : 66.66666666666667,\n " ,
275+ " \" pct_q30\" : 1.0,\n " ,
276+ " \" gc\" : 0.46\n " ,
277+ " }\n " ,
278+ " },\n " ,
279+ " {\n " ,
280+ " \" tool\" : \" bowtie2\" ,\n " ,
281+ " \" args\" : {\n " ,
282+ " \" ref\" : \" >ref\\ nTTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGTATCACATTGACAAACACGGCATTAAGTAGCGACGAAACGGGATTTGCCTGACCGGGGAGAAGCCGGTCGATCAGCAGTGGTAATTGGATATTAGGCCTAA\\ n\" ,\n " ,
283+ " \" reads\" : \" >r1\\ nGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTC\\ n>r2\\ nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\\ n>r3\\ nTTTATTACTAGCTTAATGGTATCACATTGACAAACACGGCATTAAGTAGCGACGAAACGG\\ n\" ,\n " ,
284+ " \" mode\" : \" end-to-end\"\n " ,
285+ " },\n " ,
286+ " \" output\" : {\n " ,
287+ " \" n\" : 3,\n " ,
288+ " \" aligned\" : 2,\n " ,
289+ " \" mode\" : \" end-to-end\" ,\n " ,
290+ " \" hits\" : [\n " ,
291+ " {\n " ,
292+ " \" read_id\" : 0,\n " ,
293+ " \" aligned\" : true,\n " ,
294+ " \" pos\" : 50\n " ,
295+ " },\n " ,
296+ " {\n " ,
297+ " \" read_id\" : 1,\n " ,
298+ " \" aligned\" : false,\n " ,
299+ " \" pos\" : -1\n " ,
300+ " },\n " ,
301+ " {\n " ,
302+ " \" read_id\" : 2,\n " ,
303+ " \" aligned\" : true,\n " ,
304+ " \" pos\" : 180\n " ,
305+ " }\n " ,
306+ " ]\n " ,
307+ " }\n " ,
308+ " },\n " ,
309+ " {\n " ,
310+ " \" tool\" : \" bcftools\" ,\n " ,
311+ " \" args\" : {\n " ,
312+ " \" ref\" : \" >ref\\ nTTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGTATCACATTGACAAACACGGCATTAAGTAGCGACGAAACGGGATTTGCCTGACCGGGGAGAAGCCGGTCGATCAGCAGTGGTAATTGGATATTAGGCCTAA\\ n\" ,\n " ,
313+ " \" alt\" : \" >alt\\ nTTAGTTGTGCCGCAGCGAAGTAGTGCTTGAAATATGCGACCCCTAAGTAGGAGCGTATGCGCCCAGTAACCAATGCCTGTTGAGATGCCAGACGCGTAACCAAAACATAGAAACCATCAATAGACAGGTCATAATCGGTCCACCGGATCATTGGTGCATAGAGCCTGGGCGTTAACGCCCTTTATTACTAGCTTAATGGTATCACATTGACAAACACGGCATTAAGTAGCGACGAAACGGGATTTGCCTGACCGGGGAGAAGCCGGTCGATCAGCAGTGGTAATTGGATATTAGGCCTAA\\ n\" ,\n " ,
314+ " \" win\" : 15\n " ,
315+ " },\n " ,
316+ " \" output\" : {\n " ,
317+ " \" n_sites\" : 300,\n " ,
318+ " \" n_var\" : 0,\n " ,
319+ " \" variants\" : []\n " ,
320+ " }\n " ,
321+ " }\n " ,
322+ " ]\n " ,
323+ " }\n "
324+ ]
325+ }
326+ ],
327+ "source" : [
328+ " def bench_individual() -> List[Dict[str,Any]]:\n " ,
329+ " cases=[\n " ,
330+ " (\" fastqc\" , {\" seq_fasta\" :READS,\" min_len\" :25}),\n " ,
331+ " (\" bowtie2\" , {\" ref\" :REF,\" reads\" :READS,\" mode\" :\" end-to-end\" }),\n " ,
332+ " (\" bcftools\" , {\" ref\" :REF,\" alt\" :ALT,\" win\" :10}),\n " ,
333+ " ]\n " ,
334+ " rows=[]\n " ,
335+ " for name,args in cases:\n " ,
336+ " t0=time.time(); ok=True; err=None; out=None\n " ,
337+ " try: out=server.call_tool(name,args)\n " ,
338+ " except Exception as e: ok=False; err=str(e)\n " ,
339+ " rows.append({\" tool\" :name,\" ok\" :ok,\" ms\" :int((time.time()-t0)*1000),\" out_keys\" :list(out.keys()) if ok else [],\" err\" :err})\n " ,
340+ " return rows\n " ,
341+ " \n " ,
342+ " def bench_pipeline() -> Dict[str,Any]:\n " ,
343+ " t0=time.time()\n " ,
344+ " res=run_pipeline(\" Run RNA-seq QC, align, and variant call.\" , {\" ref\" :REF,\" reads\" :READS,\" alt\" :ALT})\n " ,
345+ " ok = all(step[\" output\" ] for step in res[\" results\" ])\n " ,
346+ " return {\" pipeline\" :\" rnaseq_qc_align_call\" ,\" ok\" :ok,\" ms\" :int((time.time()-t0)*1000),\" n_steps\" :len(res[\" results\" ])}\n " ,
347+ " \n " ,
348+ " print(\" == TOOLS ==\" ); print(json.dumps(server.list_tools(), indent=2))\n " ,
349+ " print(\"\\ n== INDIVIDUAL BENCH ==\" ); print(json.dumps(bench_individual(), indent=2))\n " ,
350+ " print(\"\\ n== PIPELINE BENCH ==\" ); print(json.dumps(bench_pipeline(), indent=2))\n " ,
351+ " print(\"\\ n== PIPELINE RUN ==\" ); print(json.dumps(run_pipeline(\" Run RNA-seq QC, align, and variant call.\" , {\" ref\" :REF,\" reads\" :READS,\" alt\" :ALT}), indent=2))"
352+ ]
353+ }
354+ ]
355+ }
0 commit comments