Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions field_filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict'; /*jslint node:true es9:true*/

// Opt-in response shaping for the batch tools. Given a list of result objects
// and a list of field names, return only those top-level fields from each item
// so agent pipelines don't pay tokens for data they didn't ask for. Keys that
// would pollute the prototype are never copied, and non-object items collapse
// to {} so the output array stays uniform.
const PROTECTED_PROPS = new Set(['__proto__', 'constructor', 'prototype']);

export function filter_fields(results, fields){
if (!fields || fields.length===0)
return results;
if (!Array.isArray(results))
return results;
const safe_fields = fields.filter(f=>!PROTECTED_PROPS.has(f));
return results.map(item=>{
if (item===null || typeof item!=='object')
return {};
return Object.fromEntries(safe_fields
.filter(f=>Object.prototype.hasOwnProperty.call(item, f))
.map(f=>[f, item[f]]));
});
}
72 changes: 50 additions & 22 deletions server.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {parse_google_search_response} from './search_utils.js';
import {createRequire} from 'node:module';
import {remark} from 'remark';
import strip from 'strip-markdown';
import {filter_fields} from './field_filter.js';
const require = createRequire(import.meta.url);
const package_json = require('./package.json');
const api_token = process.env.API_TOKEN;
Expand Down Expand Up @@ -300,8 +301,13 @@ addTool({
.describe('2-letter country code for geo-targeted results '
+'(e.g., "us", "uk")'),
})).min(1).max(5),
fields: z.array(z.enum(['link', 'title', 'description',
'relevance_score', 'cursor']))
.optional()
.describe('Filter response to only these fields. '
+'Saves tokens in agent pipelines.'),
}),
execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{
execute: tool_fn('search_engine_batch', async({queries, fields}, ctx)=>{
const search_promises = queries.map(({query, engine, cursor,
geo_location})=>{
const normalized_engine = engine || 'google';
Expand Down Expand Up @@ -349,7 +355,20 @@ addTool({
});

const results = await Promise.all(search_promises);
return JSON.stringify(results, null, 2);
if (!fields)
return JSON.stringify(results, null, 2);
const filtered = results.map(item=>{
if (item && item.result && Array.isArray(item.result.organic))
return {
...item,
result: {
...item.result,
organic: filter_fields(item.result.organic, fields),
},
};
return item;
});
return JSON.stringify(filtered, null, 2);
}),
});

Expand All @@ -365,29 +384,38 @@ addTool({
openWorldHint: true,
},
parameters: z.object({
urls: z.array(z.string().url()).min(1).max(5).describe('Array of URLs to scrape (max 5)')
urls: z.array(z.string().url()).min(1).max(5)
.describe('Array of URLs to scrape (max 5)'),
fields: z.array(z.string())
.optional()
.describe('Optional: return only these fields from each result '
+'(e.g. ["content"]).'),
}),
execute: tool_fn('scrape_batch', async ({urls}, ctx)=>{
const scrapePromises = urls.map(url =>
base_request({
url: 'https://api.brightdata.com/request',
method: 'POST',
data: {
url,
zone: unlocker_zone,
format: 'raw',
data_format: 'markdown',
},
headers: api_headers(ctx.clientName, 'scrape_batch'),
responseType: 'text',
}).then(async response=>({
url,
content: (await remark()
execute: tool_fn('scrape_batch', async ({urls, fields}, ctx)=>{
const scrapePromises = urls.map(async url=>{
try {
const response = await base_request({
url: 'https://api.brightdata.com/request',
method: 'POST',
data: {
url,
zone: unlocker_zone,
format: 'raw',
data_format: 'markdown',
},
headers: api_headers(ctx.clientName, 'scrape_batch'),
responseType: 'text',
});
const content = (await remark()
.use(strip, {keep: ['link', 'linkReference', 'code',
'inlineCode']})
.process(response.data)).value,
}))
);
.process(response.data)).value;
const result = {url, content};
return fields ? filter_fields([result], fields)[0] : result;
} catch(e){
return {url, error: e instanceof Error ? e.message : String(e)};
}
});

const results = await Promise.allSettled(scrapePromises);
return JSON.stringify(results, null, 2);
Expand Down
133 changes: 133 additions & 0 deletions test/field-filter.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
'use strict'; /*jslint node:true es9:true*/
import test from 'node:test';
import assert from 'node:assert/strict';
import {filter_fields} from '../field_filter.js';

// Empty and null inputs
test('empty array returns empty array', ()=>{
assert.deepEqual(filter_fields([], ['title', 'url']), []);
});

test('null results returns null', ()=>{
assert.equal(filter_fields(null, ['title']), null);
});

test('undefined results returns undefined', ()=>{
assert.equal(filter_fields(undefined, ['title']), undefined);
});

// Empty field list returns the items untouched
test('empty fields array returns original items', ()=>{
const items = [{a: 1, b: 2}, {c: 3}];
assert.deepEqual(filter_fields(items, []), items);
});

test('null fields array returns original items', ()=>{
const items = [{a: 1}];
assert.deepEqual(filter_fields(items, null), items);
});

// Null/undefined items collapse to {}
test('null item in array returns empty object', ()=>{
assert.deepEqual(
filter_fields([{title: 'a'}, null, {title: 'b'}], ['title']),
[{title: 'a'}, {}, {title: 'b'}]);
});

test('undefined item in array returns empty object', ()=>{
assert.deepEqual(
filter_fields([{title: 'a'}, undefined, {title: 'b'}], ['title']),
[{title: 'a'}, {}, {title: 'b'}]);
});

test('null item with non-empty fields returns empty object', ()=>{
assert.deepEqual(filter_fields([null], ['title']), [{}]);
});

// Field selection
test('select single field', ()=>{
assert.deepEqual(
filter_fields([{title: 'Hello', url: 'http://x.com', desc: 'Desc'}],
['title']),
[{title: 'Hello'}]);
});

test('select multiple fields', ()=>{
assert.deepEqual(
filter_fields([{title: 'Hello', url: 'http://x.com', desc: 'Desc'}],
['title', 'url']),
[{title: 'Hello', url: 'http://x.com'}]);
});

test('select fields that do not exist returns empty object', ()=>{
assert.deepEqual(filter_fields([{title: 'Hello'}], ['url', 'desc']),
[{}]);
});

test('select fields from multiple items', ()=>{
const items = [
{title: 'A', url: 'http://a.com'},
{title: 'B', url: 'http://b.com'},
];
assert.deepEqual(filter_fields(items, ['title']),
[{title: 'A'}, {title: 'B'}]);
});

// Field ordering follows the requested order
test('fields are returned in specified order', ()=>{
assert.deepEqual(filter_fields([{z: 1, a: 2, m: 3}], ['a', 'm', 'z']),
[{a: 2, m: 3, z: 1}]);
});

// Duplicate field names are deduplicated by the output object
test('duplicate fields in list are deduplicated', ()=>{
assert.deepEqual(filter_fields([{title: 'Hello'}], ['title', 'title']),
[{title: 'Hello'}]);
});

// Non-object items
test('non-object item in array returns empty object', ()=>{
assert.deepEqual(filter_fields([42, 'string', true], ['a']),
[{}, {}, {}]);
});

test('mixed object and non-object items', ()=>{
assert.deepEqual(
filter_fields([{title: 'A'}, 42, {title: 'B'}], ['title']),
[{title: 'A'}, {}, {title: 'B'}]);
});

// Large field list
test('large field list is handled', ()=>{
const fields = Array.from({length: 1000}, (_, i)=>`field${i}`);
const r = filter_fields([{field0: 0, field500: 500, field999: 999}],
fields);
assert.deepEqual(r, [{field0: 0, field500: 500, field999: 999}]);
});

// Special characters and numeric-looking field names
test('fields with special chars', ()=>{
assert.deepEqual(
filter_fields([{'field-name': 1, 'field_name': 2, 'field.name': 3}],
['field-name', 'field_name']),
[{'field-name': 1, 'field_name': 2}]);
});

test('numeric-looking field names', ()=>{
assert.deepEqual(filter_fields([{'123': 'num', '0': 'zero'}], ['123', '0']),
[{'123': 'num', '0': 'zero'}]);
});

// Nested objects are kept as values (only top-level keys are selected)
test('nested objects are preserved as values', ()=>{
assert.deepEqual(
filter_fields([{title: 'A', meta: {k: 'v'}}], ['title', 'meta']),
[{title: 'A', meta: {k: 'v'}}]);
});

// Prototype-pollution guard: protected keys are never copied even if requested
test('protected prototype keys are never copied', ()=>{
assert.deepEqual(
filter_fields([{a: 1}], ['__proto__', 'constructor', 'prototype', 'a']),
[{a: 1}]);
});