Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
},
"license": "MIT",
"dependencies": {
"@toon-format/toon": "^0.7.2",
"axios": "^1.11.0",
"fastmcp": "^3.1.1",
"playwright": "^1.51.1",
"remark": "^15.0.1",
"strip-markdown": "^6.0.0",
"zod": "^3.24.2"
},
"publishConfig": {
Expand Down
133 changes: 105 additions & 28 deletions server.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import {z} from 'zod';
import axios from 'axios';
import {tools as browser_tools} from './browser_tools.js';
import {createRequire} from 'node:module';
import { encode } from '@toon-format/toon';
import {remark} from 'remark';
import strip from 'strip-markdown'
const require = createRequire(import.meta.url);
const package_json = require('./package.json');
const api_token = process.env.API_TOKEN;
Expand Down Expand Up @@ -163,22 +166,15 @@ addTool({
if (!is_google)
return response.data;
try {
const searchData = JSON.parse(response.data);
return JSON.stringify({
organic: searchData.organic || [],
images: searchData.images
? searchData.images.map(img=>img.link) : [],
current_page: searchData.pagination.current_page || {},
related: searchData.related || [],
ai_overview: searchData.ai_overview || null,
});
const search_data = JSON.parse(response.data);
return JSON.stringify(
clean_google_search_payload(search_data), null, 2);
} catch(e){
return JSON.stringify({
organic: [],
images: [],
pagination: {},
related: [],
});
current_page: 1,
related_keywords: [],
}, null, 2);
}
}),
});
Expand All @@ -203,7 +199,10 @@ addTool({
headers: api_headers(ctx.clientName),
responseType: 'text',
});
return response.data;
const minified_data = await remark()
.use(strip)
.process(response.data)
return minified_data.value;
}),
});

Expand Down Expand Up @@ -241,18 +240,20 @@ addTool({
responseType: 'text',
}).then(response => {
if (is_google) {
const search_data = JSON.parse(response.data);
return {
query,
engine: engine || 'google',
result: {
organic: search_data.organic || [],
images: search_data.images ? search_data.images.map(img => img.link) : [],
current_page: search_data.pagination?.current_page || {},
related: search_data.related || [],
ai_overview: search_data.ai_overview || null
}
};
try {
const search_data = JSON.parse(response.data);
return {
query,
engine: engine || 'google',
result: clean_google_search_payload(search_data),
};
} catch(e) {
return {
query,
engine: engine || 'google',
result: clean_google_search_payload(null),
};
}
}
return {
query,
Expand Down Expand Up @@ -849,8 +850,10 @@ for (let {dataset_id, id, description, inputs, defaults = {}, fixed_values = {}}
}
console.error(`[web_data_${id}] snapshot data received `
+`after ${attempts + 1} attempts`);
let result_data = JSON.stringify(snapshot_response.data);
return result_data;
const data = JSON.parse(JSON.stringify(
snapshot_response.data,
(_k, v)=>v==null ? undefined : v));
return encode(data, {delimiter: '\n', indent: 0});
} catch(e){
console.error(`[web_data_${id}] polling error: `
+`${e.message}`);
Expand Down Expand Up @@ -933,6 +936,80 @@ function tool_fn(name, fn){
};
}

function clean_google_search_payload(raw_data){
const data = raw_data && typeof raw_data=='object' ? raw_data : {};
const to_text = (value='')=>{
if (typeof value!='string')
return '';
return value
.replace(/[\u2000-\u200F\u2028\u2029]/g, '')
.replace(/\s+/g, ' ')
.trim();
};
const pick_link = entry=>{
if (!entry || typeof entry!='object')
return '';
const source = entry.link
?? entry.url
?? entry.cache?.url
?? '';
return typeof source=='string' ? source.trim() : '';
};
const organic = Array.isArray(data.organic) ? data.organic : [];
const related = Array.isArray(data.related) ? data.related : [];
const pagination = data.pagination && typeof data.pagination=='object'
? data.pagination
: {};

const organic_clean = organic
.map(entry=>{
const link = pick_link(entry);
const title = to_text(entry?.title
?? entry?.heading
?? entry?.name);
const desc_source = entry?.description
?? entry?.snippet
?? entry?.snippet_long
?? entry?.subtitle;
const description = to_text(desc_source);
if (!link || !title)
return null;
return {link, title, description};
})
.filter(Boolean);

const related_keywords = Array.from(new Set(related
.map(item=>{
if (typeof item=='string')
return to_text(item);
if (!item || typeof item!='object')
return '';
return to_text(
item.query
?? item.keyword
?? item.text
?? item.title
?? item.question
?? item.label
?? item.term
?? item.search_term
);
})
.filter(Boolean)));

const page_candidate = pagination.current_page
?? pagination.currentPage
?? pagination.page
?? pagination.current
?? pagination.index;
const parsed_page = Number(page_candidate);
const current_page = Number.isFinite(parsed_page) && parsed_page>0
? parsed_page
: 1;

return {organic: organic_clean, current_page, related_keywords};
}

function search_url(engine, query, cursor){
let q = encodeURIComponent(query);
let page = cursor ? parseInt(cursor) : 0;
Expand Down