Government Procurement Opportunities
v1PublishedActive public-sector procurement opportunities from official procurement portals, with tracking fields and evidence URLs for repeated monitoring.
Output & API
Preview the latest data, download it, or call this collector as an API.
| # | notes | status | category | currency | buyer_name | buyer_type | confidence | sub_agency | notice_type | portal_name | portal_type | subcategory | collected_at | contact_name | tracking_key | addendum_urls | contact_email | contact_phone | country_scope | document_urls | location_city | scope_summary | snapshot_label | location_region | opportunity_url | source_page_url | days_until_close | eligibility_text | location_country | posted_date_text | reference_number | closing_date_text | description_short | opportunity_title | contact_department | renewal_option_text | solicitation_number | agency_or_department | estimated_value_text | naics_cpv_unspsc_code | secondary_source_urls | contract_duration_text | posted_date_normalized | closing_date_normalized | estimated_value_normalized | set_aside_or_supplier_type | application_or_response_url | submission_instructions_text |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | null | active | null | GBP | Crescent Purchasing Consortium Limited | null | 0.5 | null | UK7: Contract details notice | Find a Tender | Official Procurement Portal | null | 2026-07-02T14:31:17.330Z | null | find-a-tender:062370-2026 | [] | null | null | UK | [] | null | null | 2026-07-02T14:31:17.326Z | null | https://www.find-tender.service.gov.uk/Notice/062370-2026?origin=SearchResults&p=1 | https://www.find-tender.service.gov.uk/Search/Results?&page=1#dashboard_notices | null | null | UK | 2 July 2026, 3:14pm | 062370-2026 | null | The CPC intends to establish a multi-supplier framework agreement for the supply of software licences, renewals, upgrades, support, and associated professional services on behalf of its members. | CPC/RB/01/2025 - Software Licences & Associated Services for Academies and Schools | null | null | null | null | £7,200,000; £2,400,000; £600,000; £3,600,000; £240,000; £360,000 | null | [] | null | 2026-07-02 | null | 7200000 | null | null | null |
| 1 | null | active | null | GBP | NHS England | null | 0.5 | null | UK4: Tender notice | Find a Tender | Official Procurement Portal | null | 2026-07-02T14:31:17.330Z | null | find-a-tender:062375-2026 | [] | null | null | UK | [] | null | null | 2026-07-02T14:31:17.326Z | null | https://www.find-tender.service.gov.uk/Notice/062375-2026?origin=SearchResults&p=1 | https://www.find-tender.service.gov.uk/Search/Results?&page=1#dashboard_notices | 29 | null | UK | 2 July 2026, 3:17pm | 062375-2026 | 31 July 2026, 12:00pm | null | Return to Practice Test of Competence Preparation Programme | null | null | null | null | £4,800,000 | null | [] | null | 2026-07-02 | 2026-07-31 | 4800000 | null | null | null |
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import { z } from "zod";
import { parseArgs } from "node:util";
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const { values: flags } = parseArgs({
strict: true,
options: {
query: { type: "string" },
"country-scope": { type: "string" },
"seed-urls": { type: "string" },
"max-opportunities": { type: "string" },
"output-mode": { type: "string" },
"portal-types": { type: "string" },
"notice-types": { type: "string" },
"status-filter": { type: "string" },
"posted-within": { type: "string" },
"deadline-within": { type: "string" },
"agency-filter": { type: "string" },
"region-filter": { type: "string" },
"naics-or-category": { type: "string" },
"include-awards": { type: "string" },
"include-documents": { type: "string" },
"include-contact-details": { type: "string" },
language: { type: "string" },
"snapshot-label": { type: "string" },
"sort-hint": { type: "string" },
},
});
const query = clean(flags.query ?? "");
const countryScope = clean(flags["country-scope"] ?? "");
const seedUrls = splitCsv(flags["seed-urls"] ?? "");
const maxOpportunities = parsePositiveInt(flags["max-opportunities"] ?? "25", "max-opportunities");
const outputMode = clean(flags["output-mode"] ?? "opportunity_rows");
const portalTypes = splitCsv(flags["portal-types"] ?? "federal,state,local");
const noticeTypes = splitCsv(flags["notice-types"] ?? "RFP,RFQ,tender,solicitation");
const statusFilter = clean(flags["status-filter"] ?? "active").toLowerCase();
const postedWithin = clean(flags["posted-within"] ?? "30 days");
const deadlineWithin = clean(flags["deadline-within"] ?? "");
const agencyFilter = clean(flags["agency-filter"] ?? "");
const regionFilter = clean(flags["region-filter"] ?? "");
const naicsOrCategory = clean(flags["naics-or-category"] ?? "");
const includeAwards = parseBoolean(flags["include-awards"] ?? "false", "include-awards");
const includeDocuments = parseBoolean(flags["include-documents"] ?? "true", "include-documents");
const includeContactDetails = parseBoolean(flags["include-contact-details"] ?? "true", "include-contact-details");
const language = clean(flags.language ?? "English");
const snapshotLabel = clean(flags["snapshot-label"] ?? "") || new Date().toISOString();
const sortHint = clean(flags["sort-hint"] ?? "best match");
if (!query && seedUrls.length === 0) {
console.error("OUT_OF_SCOPE: provide at least one of --query or --seed-urls");
process.exit(1);
}
if (!countryScope) {
console.error("OUT_OF_SCOPE: --country-scope is required");
process.exit(1);
}
if (!["opportunity_rows", "grouped_by_portal"].includes(outputMode)) {
console.error("OUT_OF_SCOPE: --output-mode must be opportunity_rows or grouped_by_portal");
process.exit(1);
}
if (!["active", "open", "closing_soon", "all"].includes(statusFilter)) {
console.error("OUT_OF_SCOPE: --status-filter must be active, open, closing_soon, or all");
process.exit(1);
}
const OpportunitySchema = z.object({
opportunities: z.array(z.object({
country_scope: z.string().nullable().optional(),
portal_name: z.string().nullable().optional(),
portal_type: z.string().nullable().optional(),
opportunity_title: z.string().nullable().optional(),
notice_type: z.string().nullable().optional(),
status: z.string().nullable().optional(),
reference_number: z.string().nullable().optional(),
solicitation_number: z.string().nullable().optional(),
buyer_name: z.string().nullable().optional(),
buyer_type: z.string().nullable().optional(),
agency_or_department: z.string().nullable().optional(),
sub_agency: z.string().nullable().optional(),
opportunity_url: z.string().nullable().optional(),
application_or_response_url: z.string().nullable().optional(),
source_page_url: z.string().nullable().optional(),
secondary_source_urls: z.array(z.string()).nullable().optional(),
description_short: z.string().nullable().optional(),
scope_summary: z.string().nullable().optional(),
posted_date_text: z.string().nullable().optional(),
closing_date_text: z.string().nullable().optional(),
estimated_value_text: z.string().nullable().optional(),
currency: z.string().nullable().optional(),
location_country: z.string().nullable().optional(),
location_region: z.string().nullable().optional(),
location_city: z.string().nullable().optional(),
eligibility_text: z.string().nullable().optional(),
set_aside_or_supplier_type: z.string().nullable().optional(),
naics_cpv_unspsc_code: z.string().nullable().optional(),
category: z.string().nullable().optional(),
subcategory: z.string().nullable().optional(),
contract_duration_text: z.string().nullable().optional(),
renewal_option_text: z.string().nullable().optional(),
document_urls: z.array(z.string()).nullable().optional(),
addendum_urls: z.array(z.string()).nullable().optional(),
contact_name: z.string().nullable().optional(),
contact_email: z.string().nullable().optional(),
contact_phone: z.string().nullable().optional(),
contact_department: z.string().nullable().optional(),
submission_instructions_text: z.string().nullable().optional(),
confidence: z.number().nullable().optional(),
notes: z.string().nullable().optional(),
award_status: z.string().nullable().optional(),
award_date_text: z.string().nullable().optional(),
awardee_name: z.string().nullable().optional(),
award_value_text: z.string().nullable().optional(),
award_source_url: z.string().nullable().optional(),
})).default([]),
});
type ExtractedOpportunity = z.infer<typeof OpportunitySchema>["opportunities"][number];
type OpportunityRow = {
country_scope: string | null;
portal_name: string | null;
portal_type: string | null;
tracking_key: string;
snapshot_label: string;
collected_at: string;
opportunity_title: string | null;
notice_type: string | null;
status: string | null;
reference_number: string | null;
solicitation_number: string | null;
buyer_name: string | null;
buyer_type: string | null;
agency_or_department: string | null;
sub_agency: string | null;
opportunity_url: string | null;
application_or_response_url: string | null;
source_page_url: string | null;
secondary_source_urls: string[];
description_short: string | null;
scope_summary: string | null;
posted_date_text: string | null;
posted_date_normalized: string | null;
closing_date_text: string | null;
closing_date_normalized: string | null;
days_until_close: number | null;
estimated_value_text: string | null;
estimated_value_normalized: number | null;
currency: string | null;
location_country: string | null;
location_region: string | null;
location_city: string | null;
eligibility_text: string | null;
set_aside_or_supplier_type: string | null;
naics_cpv_unspsc_code: string | null;
category: string | null;
subcategory: string | null;
contract_duration_text: string | null;
renewal_option_text: string | null;
document_urls: string[];
addendum_urls: string[];
contact_name: string | null;
contact_email: string | null;
contact_phone: string | null;
contact_department: string | null;
submission_instructions_text: string | null;
confidence: number;
notes: string | null;
award_status?: string | null;
award_date_text?: string | null;
award_date_normalized?: string | null;
awardee_name?: string | null;
award_value_text?: string | null;
award_value_normalized?: number | null;
award_source_url?: string | null;
};
async function main() {
const collectedAt = new Date().toISOString();
const candidateUrls = await discoverCandidateUrls();
if (candidateUrls.length === 0) {
throw new Error("no official procurement candidate URLs found for the supplied filters");
}
const rows: OpportunityRow[] = [];
const seen = new Map<string, OpportunityRow>();
for (const url of candidateUrls) {
if (rows.length >= maxOpportunities) break;
console.error(`Extracting procurement opportunity data from ${url}`);
const extracted = await extractFromUrl(url);
for (const item of extracted) {
const row = normalizeOpportunity(item, url, collectedAt);
if (!row) continue;
if (!includeAwards && isAwardLike(row)) continue;
if (!statusAllowed(row)) continue;
if (!matchesFilters(row)) continue;
const existing = seen.get(row.tracking_key);
if (existing) {
mergeSecondarySources(existing, row);
continue;
}
seen.set(row.tracking_key, row);
rows.push(row);
if (rows.length >= maxOpportunities) break;
}
}
if (rows.length === 0) {
throw new Error("no active procurement opportunities extracted from official candidate pages");
}
const output = outputMode === "grouped_by_portal" ? groupByPortal(rows) : rows;
process.stdout.write(JSON.stringify(output));
}
async function discoverCandidateUrls(): Promise<string[]> {
const urls: string[] = [];
for (const seedUrl of seedUrls) {
if (isLikelyProcurementSource(seedUrl)) urls.push(seedUrl);
}
const terms = [
query,
noticeTypes.join(" OR "),
countryScope,
agencyFilter,
regionFilter,
naicsOrCategory,
statusFilter === "all" ? "" : "active open",
postedWithin ? `posted within ${postedWithin}` : "",
deadlineWithin ? `deadline within ${deadlineWithin}` : "",
sortHint,
"official procurement portal bid tender solicitation",
].filter(Boolean).join(" ");
const countryOfficialSites = countryHints(countryScope);
const searchQueries = countryOfficialSites.length > 0
? countryOfficialSites.map((site) => `site:${site} ${terms}`)
: [terms];
for (const searchQuery of searchQueries.slice(0, 4)) {
console.error(`Searching: ${searchQuery}`);
const result = await firecrawl.search(searchQuery, {
limit: Math.min(10, Math.max(4, maxOpportunities)),
integration: "prometheus",
});
const data = Array.isArray((result as any).web) ? (result as any).web : Array.isArray((result as any).data) ? (result as any).data : [];
for (const item of data) {
const url = item?.url || item?.metadata?.sourceURL;
if (typeof url === "string" && isLikelyProcurementSource(url)) urls.push(url);
}
}
return unique(urls).slice(0, Math.min(24, Math.max(maxOpportunities * 2, 8)));
}
async function extractFromUrl(url: string): Promise<ExtractedOpportunity[]> {
const prompt = [
"Extract active/open public-sector procurement opportunities from this official procurement page.",
"Include RFPs, RFQs, tenders, bids, solicitations, ITTs, and related contract opportunities.",
`Country/geography scope: ${countryScope}. Query/theme: ${query || "not specified"}.`,
`Notice types requested: ${noticeTypes.join(", ")}. Status filter: ${statusFilter}.`,
includeAwards ? "Also capture award records, clearly marking award fields." : "Do not include award-only records unless the page is also an active/open opportunity.",
includeDocuments ? "Capture public document, attachment, addendum, amendment, and download URLs when visible." : "Do not spend effort extracting document URLs.",
includeContactDetails ? "Capture public buyer contact names, emails, phones, and departments when visible." : "Do not spend effort extracting contact details.",
"Do not fabricate missing values. Use null for unavailable fields.",
"Prefer exact visible text for dates, values, classifications, eligibility, and instructions.",
"Return only opportunities that are relevant to the requested query, filters, agency, region, or category when those are supplied.",
].join(" ");
const doc = await firecrawl.scrape(url, {
formats: [{
type: "json",
prompt,
schema: OpportunitySchema,
}],
onlyMainContent: true,
waitFor: 2000,
timeout: 60000,
integration: "prometheus",
});
const json = (doc as any).json;
const parsed = OpportunitySchema.safeParse(json);
if (!parsed.success) {
console.error(`Skipping ${url}: JSON extraction did not match expected opportunity schema`);
return [];
}
return parsed.data.opportunities;
}
function normalizeOpportunity(item: ExtractedOpportunity, fallbackUrl: string, collectedAt: string): OpportunityRow | null {
const extractedOpportunityUrl = firstUrl(item.opportunity_url);
const sourcePageUrl = firstUrl(item.source_page_url) || fallbackUrl;
const opportunityUrl = isSpecificOpportunityUrl(extractedOpportunityUrl) ? extractedOpportunityUrl : sourcePageUrl;
const reference = clean(item.reference_number ?? "") || clean(item.solicitation_number ?? "");
const portalName = clean(item.portal_name ?? "") || portalNameFromUrl(sourcePageUrl);
const title = clean(item.opportunity_title ?? "");
if (!title && !reference) return null;
const closingNormalized = normalizeDate(item.closing_date_text ?? null);
const postedNormalized = normalizeDate(item.posted_date_text ?? null);
const daysUntilClose = closingNormalized ? daysBetween(new Date(), new Date(`${closingNormalized}T00:00:00Z`)) : null;
const valueNumber = normalizeMoney(item.estimated_value_text ?? null);
const docUrls = includeDocuments ? normalizeUrlList(item.document_urls) : [];
const addendumUrls = includeDocuments ? normalizeUrlList(item.addendum_urls) : [];
const secondary = normalizeUrlList(item.secondary_source_urls).filter((u) => u !== opportunityUrl && u !== sourcePageUrl);
const trackingBase = reference || opportunityUrl || sourcePageUrl || title;
const trackingKey = `${slug(portalName)}:${slug(trackingBase)}`;
const row: OpportunityRow = {
country_scope: countryScope,
portal_name: portalName,
portal_type: clean(item.portal_type ?? "") || inferPortalType(sourcePageUrl),
tracking_key: trackingKey,
snapshot_label: snapshotLabel,
collected_at: collectedAt,
opportunity_title: title || null,
notice_type: clean(item.notice_type ?? "") || inferNoticeType(title),
status: clean(item.status ?? "") || null,
reference_number: clean(item.reference_number ?? "") || null,
solicitation_number: clean(item.solicitation_number ?? "") || null,
buyer_name: clean(item.buyer_name ?? "") || null,
buyer_type: clean(item.buyer_type ?? "") || null,
agency_or_department: clean(item.agency_or_department ?? "") || null,
sub_agency: clean(item.sub_agency ?? "") || null,
opportunity_url: opportunityUrl,
application_or_response_url: firstUrl(item.application_or_response_url),
source_page_url: sourcePageUrl,
secondary_source_urls: secondary,
description_short: clean(item.description_short ?? "") || null,
scope_summary: clean(item.scope_summary ?? "") || null,
posted_date_text: clean(item.posted_date_text ?? "") || null,
posted_date_normalized: postedNormalized,
closing_date_text: clean(item.closing_date_text ?? "") || null,
closing_date_normalized: closingNormalized,
days_until_close: daysUntilClose,
estimated_value_text: clean(item.estimated_value_text ?? "") || null,
estimated_value_normalized: valueNumber,
currency: clean(item.currency ?? "") || inferCurrency(item.estimated_value_text ?? null, countryScope),
location_country: clean(item.location_country ?? "") || countryScope,
location_region: clean(item.location_region ?? "") || null,
location_city: clean(item.location_city ?? "") || null,
eligibility_text: clean(item.eligibility_text ?? "") || null,
set_aside_or_supplier_type: clean(item.set_aside_or_supplier_type ?? "") || null,
naics_cpv_unspsc_code: clean(item.naics_cpv_unspsc_code ?? "") || null,
category: clean(item.category ?? "") || null,
subcategory: clean(item.subcategory ?? "") || null,
contract_duration_text: clean(item.contract_duration_text ?? "") || null,
renewal_option_text: clean(item.renewal_option_text ?? "") || null,
document_urls: docUrls,
addendum_urls: addendumUrls,
contact_name: includeContactDetails ? clean(item.contact_name ?? "") || null : null,
contact_email: includeContactDetails ? clean(item.contact_email ?? "") || null : null,
contact_phone: includeContactDetails ? clean(item.contact_phone ?? "") || null : null,
contact_department: includeContactDetails ? clean(item.contact_department ?? "") || null : null,
submission_instructions_text: clean(item.submission_instructions_text ?? "") || null,
confidence: clampConfidence(item.confidence),
notes: clean(item.notes ?? "") || null,
};
if (includeAwards) {
row.award_status = clean(item.award_status ?? "") || null;
row.award_date_text = clean(item.award_date_text ?? "") || null;
row.award_date_normalized = normalizeDate(item.award_date_text ?? null);
row.awardee_name = clean(item.awardee_name ?? "") || null;
row.award_value_text = clean(item.award_value_text ?? "") || null;
row.award_value_normalized = normalizeMoney(item.award_value_text ?? null);
row.award_source_url = firstUrl(item.award_source_url);
}
if (!row.opportunity_url && !row.source_page_url) return null;
return row;
}
function statusAllowed(row: OpportunityRow): boolean {
if (statusFilter === "all") return true;
const text = `${row.status ?? ""} ${row.notes ?? ""}`.toLowerCase();
if (/\b(inactive|closed|expired|cancelled|canceled|archived|withdrawn|not active|no longer available)\b/.test(text)) return false;
if (statusFilter === "active" || statusFilter === "open") {
if (row.days_until_close !== null && row.days_until_close < 0) return false;
if (/\b(active|open|published|current|accepting|available|solicitation)\b/.test(text)) return true;
return row.days_until_close !== null && row.days_until_close >= 0;
}
if (statusFilter === "closing_soon") {
return row.days_until_close !== null && row.days_until_close >= 0 && row.days_until_close <= 14;
}
return true;
}
function matchesFilters(row: OpportunityRow): boolean {
const haystack = [
row.opportunity_title,
row.description_short,
row.scope_summary,
row.buyer_name,
row.agency_or_department,
row.location_region,
row.location_city,
row.naics_cpv_unspsc_code,
row.category,
row.subcategory,
row.notes,
].filter(Boolean).join(" ").toLowerCase();
if (agencyFilter && !haystack.includes(agencyFilter.toLowerCase())) return false;
if (regionFilter && !haystack.includes(regionFilter.toLowerCase())) return false;
if (naicsOrCategory && !haystack.includes(naicsOrCategory.toLowerCase())) return false;
if (postedWithin && row.posted_date_normalized && !isWithinDays(row.posted_date_normalized, postedWithin, "past")) return false;
if (deadlineWithin && row.closing_date_normalized && !isWithinDays(row.closing_date_normalized, deadlineWithin, "future")) return false;
return true;
}
function groupByPortal(rows: OpportunityRow[]) {
const groups = new Map<string, { country_scope: string | null; portal_name: string | null; portal_type: string | null; snapshot_label: string; collected_at: string; opportunities: OpportunityRow[] }>();
for (const row of rows) {
const key = `${row.portal_name ?? "Unknown"}|${row.portal_type ?? "Unknown"}`;
if (!groups.has(key)) {
groups.set(key, {
country_scope: row.country_scope,
portal_name: row.portal_name,
portal_type: row.portal_type,
snapshot_label: row.snapshot_label,
collected_at: row.collected_at,
opportunities: [],
});
}
groups.get(key)!.opportunities.push(row);
}
return Array.from(groups.values());
}
function countryHints(scope: string): string[] {
const normalized = scope.toLowerCase();
if (normalized === "us" || normalized.includes("united states")) return ["sam.gov"];
if (normalized === "uk" || normalized.includes("united kingdom")) return ["contracts-finder.service.gov.uk", "find-tender.service.gov.uk"];
if (normalized.includes("singapore")) return ["gebiz.gov.sg"];
if (normalized.includes("canada")) return ["canadabuys.canada.ca"];
if (normalized === "eu" || normalized.includes("europe")) return ["ted.europa.eu", "ec.europa.eu"];
if (normalized.includes("australia")) return ["tenders.gov.au"];
if (normalized.includes("new zealand")) return ["gets.govt.nz"];
return [];
}
function isLikelyProcurementSource(url: string): boolean {
try {
const parsed = new URL(url);
const host = parsed.hostname.toLowerCase();
const path = parsed.pathname.toLowerCase();
if (host === "www.gov.uk" && /^\/contracts-finder\/?$/.test(path)) return false;
const officialHost = host.endsWith(".gov") || host.endsWith(".gov.uk") || host.endsWith(".gov.sg") || host.endsWith(".gc.ca") || host.endsWith(".canada.ca") || host.endsWith(".europa.eu") || host.endsWith(".gov.au") || host.endsWith(".govt.nz");
const knownPortal = /sam\.gov|gebiz\.gov\.sg|contracts-finder|find-tender|ted\.europa\.eu|canadabuys|tenders\.gov\.au|gets\.govt\.nz/.test(host + path);
const procurementPath = /opp|opportunit|contract|tender|bid|rfp|rfq|solicitation|procure|notice|award/.test(host + path);
return (officialHost || knownPortal) && procurementPath;
} catch {
return false;
}
}
function isSpecificOpportunityUrl(url: string | null): url is string {
if (!url) return false;
try {
const parsed = new URL(url);
const path = parsed.pathname.toLowerCase();
return /opp|opportunit|contract|tender|bid|rfp|rfq|solicitation|procure|notice|award/.test(path) && path.length > 1;
} catch {
return false;
}
}
function inferPortalType(url: string): string | null {
const value = url.toLowerCase();
if (/sam\.gov|contracts-finder|find-tender|gebiz|ted\.europa|canadabuys|tenders\.gov\.au|gets\.govt\.nz/.test(value)) return "federal";
if (/state|province|county|city|municipal|local/.test(value)) return "state/local";
if (/university|college|school/.test(value)) return "university";
return portalTypes[0] ?? null;
}
function inferNoticeType(text: string): string | null {
const value = text.toLowerCase();
if (/\brfp\b|request for proposal/.test(value)) return "RFP";
if (/\brfq\b|request for quote|request for quotation/.test(value)) return "RFQ";
if (/\bitt\b|invitation to tender/.test(value)) return "ITT";
if (/tender/.test(value)) return "tender";
if (/solicitation/.test(value)) return "solicitation";
if (/award/.test(value)) return "award";
return null;
}
function isAwardLike(row: OpportunityRow): boolean {
const text = `${row.notice_type ?? ""} ${row.status ?? ""} ${row.award_status ?? ""}`.toLowerCase();
return /\baward|awarded|contract award\b/.test(text) && !/\bactive|open|solicitation\b/.test(text);
}
function mergeSecondarySources(existing: OpportunityRow, row: OpportunityRow): void {
existing.secondary_source_urls = unique([
...existing.secondary_source_urls,
row.source_page_url,
row.opportunity_url,
...row.secondary_source_urls,
].filter((url): url is string => Boolean(url && url !== existing.source_page_url && url !== existing.opportunity_url)));
}
function splitCsv(value: string): string[] {
return value.split(",").map(clean).filter(Boolean);
}
function clean(value: string): string {
return String(value ?? "").replace(/\s+/g, " ").trim();
}
function parsePositiveInt(value: string, name: string): number {
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 250) {
console.error(`OUT_OF_SCOPE: --${name} must be an integer from 1 to 250`);
process.exit(1);
}
return parsed;
}
function parseBoolean(value: string, name: string): boolean {
const normalized = value.toLowerCase();
if (normalized === "true") return true;
if (normalized === "false") return false;
console.error(`OUT_OF_SCOPE: --${name} must be true or false`);
process.exit(1);
}
function unique(values: string[]): string[] {
return Array.from(new Set(values.map(clean).filter(Boolean)));
}
function firstUrl(value: unknown): string | null {
const text = clean(String(value ?? ""));
if (!text) return null;
try {
return new URL(text).toString();
} catch {
const match = text.match(/https?:\/\/[^\s)]+/i);
return match ? match[0] : null;
}
}
function normalizeUrlList(values: unknown): string[] {
if (!Array.isArray(values)) return [];
return unique(values.flatMap((value) => {
const url = firstUrl(value);
return url ? [url] : [];
}));
}
function portalNameFromUrl(url: string): string {
try {
const host = new URL(url).hostname.replace(/^www\./, "");
if (host.includes("sam.gov")) return "SAM.gov";
if (host.includes("contracts-finder")) return "UK Contracts Finder";
if (host.includes("find-tender")) return "UK Find a Tender";
if (host.includes("gebiz.gov.sg")) return "GeBIZ";
if (host.includes("ted.europa.eu")) return "Tenders Electronic Daily";
if (host.includes("canadabuys")) return "CanadaBuys";
return host;
} catch {
return "Unknown Portal";
}
}
function slug(value: string): string {
return clean(value).toLowerCase().replace(/https?:\/\//, "").replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 160) || "unknown";
}
function normalizeDate(value: string | null): string | null {
const text = clean(value ?? "");
if (!text) return null;
const cleaned = text
.replace(/\b(\d{1,2})(st|nd|rd|th)\b/gi, "$1")
.replace(/\bat\b/gi, " ")
.replace(/(\d)(am|pm)\b/gi, "$1 $2");
const parsedFromText = Date.parse(cleaned);
if (!Number.isNaN(parsedFromText)) return new Date(parsedFromText).toISOString().slice(0, 10);
const monthPattern = /\b(\d{1,2})\s+(january|february|march|april|may|june|july|august|september|october|november|december)\s+(\d{4})\b/i;
const match = cleaned.match(monthPattern);
if (match) {
const months: Record<string, number> = {
january: 0,
february: 1,
march: 2,
april: 3,
may: 4,
june: 5,
july: 6,
august: 7,
september: 8,
october: 9,
november: 10,
december: 11,
};
const day = Number(match[1]);
const month = months[match[2].toLowerCase()];
const year = Number(match[3]);
return new Date(Date.UTC(year, month, day)).toISOString().slice(0, 10);
}
const parsed = Date.parse(cleaned);
if (Number.isNaN(parsed)) return null;
return new Date(parsed).toISOString().slice(0, 10);
}
function daysBetween(start: Date, end: Date): number {
const dayMs = 24 * 60 * 60 * 1000;
const startUtc = Date.UTC(start.getUTCFullYear(), start.getUTCMonth(), start.getUTCDate());
const endUtc = Date.UTC(end.getUTCFullYear(), end.getUTCMonth(), end.getUTCDate());
return Math.ceil((endUtc - startUtc) / dayMs);
}
function isWithinDays(dateText: string, windowText: string, direction: "past" | "future"): boolean {
const days = parseWindowDays(windowText);
if (days === null) return true;
const diff = daysBetween(new Date(), new Date(`${dateText}T00:00:00Z`));
return direction === "future" ? diff >= 0 && diff <= days : diff <= 0 && Math.abs(diff) <= days;
}
function parseWindowDays(value: string): number | null {
const match = clean(value).match(/(\d+)/);
if (!match) return null;
const amount = Number(match[1]);
if (!Number.isFinite(amount)) return null;
if (/month/i.test(value)) return amount * 30;
if (/year/i.test(value)) return amount * 365;
return amount;
}
function normalizeMoney(value: string | null): number | null {
const text = clean(value ?? "");
if (!text) return null;
const multiplier = /\bmillion|mn\b/i.test(text) ? 1_000_000 : /\bbillion|bn\b/i.test(text) ? 1_000_000_000 : 1;
const match = text.replace(/,/g, "").match(/-?\d+(?:\.\d+)?/);
if (!match) return null;
return Number(match[0]) * multiplier;
}
function inferCurrency(value: string | null, scope: string): string | null {
const text = clean(value ?? "");
if (/\$|USD/i.test(text)) return "USD";
if (/£|GBP/i.test(text)) return "GBP";
if (/€|EUR/i.test(text)) return "EUR";
if (/CAD/i.test(text)) return "CAD";
if (/SGD/i.test(text)) return "SGD";
const normalizedScope = scope.toLowerCase();
if (normalizedScope === "us" || normalizedScope.includes("united states")) return "USD";
if (normalizedScope === "uk" || normalizedScope.includes("united kingdom")) return "GBP";
if (normalizedScope.includes("singapore")) return "SGD";
if (normalizedScope.includes("canada")) return "CAD";
if (normalizedScope === "eu" || normalizedScope.includes("europe")) return "EUR";
return null;
}
function clampConfidence(value: number | null | undefined): number {
if (typeof value !== "number" || Number.isNaN(value)) return 0.5;
return Math.max(0, Math.min(1, value));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.