Government Procurement Opportunities collector facts

Publisher: bo-05 (@bo-05).

Version: 1. Last updated: 2026-07-04T06:31:50.111Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: status, location, buyer_name, portal_name, posted_date, closing_date, opportunity_url, source_page_url, opportunity_title.

Parameters: seed-urls (string, required), max-items (number, required), query (string, required).

Government Procurement Opportunities

v1Published

Active government procurement opportunities from the supplied official portal pages, returned as a simple bid table.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
#statuslocationbuyer_nameportal_nameposted_dateclosing_dateopportunity_urlsource_page_urlopportunity_title
0OpenPhilippinesPALLADIUM INTERNATIONAL LIMITEDContracts Finder3 July 202627 July 2026, 12pmhttps://www.contractsfinder.service.gov.uk/notice/f2258fd3-f905-4718-aa61-b9332e22a733?origin=SearchResults&p=1https://www.contractsfinder.service.gov.uk/Search/Results?searchText=software&status=Open&sort=RelevanceCarbon Market Opportunity Assessment​
1OpenBA21 4DRYeovil CollegeContracts Finder3 July 20267 August 2026, 12pmhttps://www.contractsfinder.service.gov.uk/notice/0324c226-276b-47aa-abe1-10c7229f9e44?origin=SearchResults&p=1https://www.contractsfinder.service.gov.uk/Search/Results?searchText=software&status=Open&sort=RelevanceCA18171 - Yeovil College ITT for Yeovil College Rebrand
2OpenBT34 2QUNewry, Mourne and Down District Council (NMD)Contracts Finder2 July 202624 July 2026, 12pmhttps://www.contractsfinder.service.gov.uk/notice/e2d8a2fe-2ac6-47f9-8c6b-bf41f1f04465?origin=SearchResults&p=1https://www.contractsfinder.service.gov.uk/Search/Results?searchText=software&status=Open&sort=RelevanceCA18163 - RFQ 2026/19 - Virtual fence technology cattle collar system
Parameters
--seed-urlsstringrequiredComma-separated official procurement portal URLs or procurement search result URLs to search. e.g. "https://www.contractsfinder.service.gov.uk/Search/Results?searchText=software&status=Open&sort=Relevance"
--max-itemsnumberrequiredMaximum number of active opportunities to return. e.g. 3
--querystringrequiredKeyword or theme to match, such as software, cybersecurity, construction, cleaning, or AI. e.g. "software"

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
bo-05@bo-05
0 runs in 14d · published 1d ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent1d ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import { z } from "zod";
import { parseArgs } from "node:util";

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("FIRECRAWL_API_KEY is not set");
  process.exit(1);
}

const firecrawl = new Firecrawl({ apiKey });

const { values: flags } = parseArgs({
  strict: true,
  options: {
    "seed-urls": { type: "string" },
    "max-items": { type: "string" },
    query: { type: "string" },
  },
});

const seedUrls = splitCsv(flags["seed-urls"] ?? "");
const query = clean(flags.query ?? "");
const maxItems = parseMaxItems(flags["max-items"] ?? "");

if (seedUrls.length === 0) {
  console.error("OUT_OF_SCOPE: --seed-urls is required and must contain at least one URL");
  process.exit(1);
}
if (!query) {
  console.error("OUT_OF_SCOPE: --query is required");
  process.exit(1);
}

const normalizedSeedUrls = seedUrls.map((url) => normalizeHttpUrl(url, null));
for (const url of normalizedSeedUrls) {
  if (!url) {
    console.error("OUT_OF_SCOPE: every --seed-urls entry must be a valid http or https URL");
    process.exit(1);
  }
}

const OpportunitySchema = z.object({
  opportunities: z.array(z.object({
    portal_name: z.string().nullable().optional(),
    opportunity_title: z.string().nullable().optional(),
    buyer_name: z.string().nullable().optional(),
    location: z.string().nullable().optional(),
    posted_date: z.string().nullable().optional(),
    closing_date: z.string().nullable().optional(),
    status: z.string().nullable().optional(),
    opportunity_url: z.string().nullable().optional(),
    source_page_url: z.string().nullable().optional(),
  })).default([]),
});

type ExtractedOpportunity = z.infer<typeof OpportunitySchema>["opportunities"][number];

type OpportunityRow = {
  portal_name: string | null;
  opportunity_title: string | null;
  buyer_name: string | null;
  location: string | null;
  posted_date: string | null;
  closing_date: string | null;
  status: string | null;
  opportunity_url: string | null;
  source_page_url: string | null;
};

async function main() {
  const candidateUrls = await discoverCandidateUrls(normalizedSeedUrls as string[]);
  if (candidateUrls.length === 0) {
    throw new Error("no candidate procurement pages found from the supplied seed URLs");
  }

  const rows: OpportunityRow[] = [];
  const seen = new Set<string>();

  for (const sourcePageUrl of candidateUrls) {
    if (rows.length >= maxItems) break;

    console.error(`Extracting opportunities from ${sourcePageUrl}`);
    const extracted = await extractOpportunities(sourcePageUrl);

    for (const item of extracted) {
      const row = normalizeOpportunity(item, sourcePageUrl);
      if (!row) continue;
      if (!isOpenOpportunity(row)) continue;

      const key = dedupeKey(row);
      if (seen.has(key)) continue;
      seen.add(key);

      rows.push(row);
      if (rows.length >= maxItems) break;
    }
  }

  process.stdout.write(JSON.stringify(rows));
}

async function discoverCandidateUrls(seeds: string[]): Promise<string[]> {
  const candidates: string[] = [];

  for (const seed of seeds) {
    candidates.push(seed);

    const host = hostFromUrl(seed);
    if (!host) continue;

    const searchQuery = `site:${host} ${query} open procurement opportunity tender bid solicitation contract`;
    console.error(`Searching portal domain: ${searchQuery}`);

    const result = await firecrawl.search(searchQuery, {
      limit: Math.min(5, Math.max(2, maxItems)),
      integration: "prometheus",
    });

    const searchItems = Array.isArray((result as any).web)
      ? (result as any).web
      : Array.isArray((result as any).data)
        ? (result as any).data
        : [];

    for (const item of searchItems) {
      const url = normalizeHttpUrl(item?.url ?? item?.metadata?.sourceURL, seed);
      if (!url) continue;
      if (hostFromUrl(url) !== host) continue;
      if (!looksLikeProcurementUrl(url)) continue;
      candidates.push(url);
    }
  }

  return unique(candidates).slice(0, Math.min(12, Math.max(3, maxItems * 2)));
}

async function extractOpportunities(sourcePageUrl: string): Promise<ExtractedOpportunity[]> {
  const prompt = [
    "Extract active or currently open government procurement opportunities from this official procurement page.",
    `Only include opportunities matching this keyword or theme: ${query}.`,
    "Return one row per active bid, tender, solicitation, RFP, RFQ, contract notice, or opportunity.",
    "Ignore closed, awarded, expired, archived, cancelled, or withdrawn notices when that status is visible.",
    "Do not invent dates, buyer names, status, locations, URLs, or portal names.",
    "Use null for fields that are not visible.",
    "For source_page_url, use the page URL where the row was found.",
  ].join(" ");

  const doc = await firecrawl.scrape(sourcePageUrl, {
    formats: [{
      type: "json",
      prompt,
      schema: OpportunitySchema,
    }],
    onlyMainContent: true,
    waitFor: 2000,
    timeout: 60000,
    integration: "prometheus",
  });

  const parsed = OpportunitySchema.safeParse((doc as any).json);
  if (!parsed.success) {
    console.error(`Skipping ${sourcePageUrl}: extracted JSON did not match the expected shape`);
    return [];
  }

  return parsed.data.opportunities;
}

function normalizeOpportunity(item: ExtractedOpportunity, fallbackSourceUrl: string): OpportunityRow | null {
  const sourcePageUrl = normalizeHttpUrl(item.source_page_url, fallbackSourceUrl) ?? fallbackSourceUrl;
  const opportunityUrl = normalizeHttpUrl(item.opportunity_url, sourcePageUrl);
  const title = nullableText(item.opportunity_title);
  const buyerName = nullableText(item.buyer_name);

  if (!title && !opportunityUrl) return null;

  return {
    portal_name: nullableText(item.portal_name) ?? portalNameFromUrl(sourcePageUrl),
    opportunity_title: title,
    buyer_name: buyerName,
    location: nullableText(item.location),
    posted_date: nullableText(item.posted_date),
    closing_date: nullableText(item.closing_date),
    status: nullableText(item.status),
    opportunity_url: opportunityUrl,
    source_page_url: sourcePageUrl,
  };
}

function isOpenOpportunity(row: OpportunityRow): boolean {
  const statusText = clean(row.status ?? "").toLowerCase();
  if (/\b(closed|expired|awarded|award notice|archived|cancelled|canceled|withdrawn|inactive)\b/.test(statusText)) {
    return false;
  }

  const closingDate = parseVisibleDate(row.closing_date);
  if (closingDate && closingDate.getTime() < startOfTodayUtc().getTime()) {
    return false;
  }

  if (!statusText) return true;
  return /\b(open|active|current|published|accepting|live|opportunity)\b/.test(statusText) || Boolean(closingDate);
}

function dedupeKey(row: OpportunityRow): string {
  const portal = slug(row.portal_name ?? hostFromUrl(row.source_page_url ?? "") ?? "");
  const notice = row.opportunity_url ? stripTracking(row.opportunity_url) : [
    row.opportunity_title,
    row.buyer_name,
    row.closing_date,
  ].map((part) => slug(part ?? "")).join("|");
  return `${portal}:${notice}`;
}

function splitCsv(value: string): string[] {
  return value.split(",").map(clean).filter(Boolean);
}

function parseMaxItems(value: string): number {
  const parsed = Number(value);
  if (!Number.isInteger(parsed) || parsed < 1 || parsed > 100) {
    console.error("OUT_OF_SCOPE: --max-items must be an integer from 1 to 100");
    process.exit(1);
  }
  return parsed;
}

function normalizeHttpUrl(value: unknown, baseUrl: string | null): string | null {
  const text = clean(String(value ?? ""));
  if (!text) return null;

  try {
    const parsed = baseUrl ? new URL(text, baseUrl) : new URL(text);
    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return null;
    parsed.hash = "";
    return parsed.toString();
  } catch {
    return null;
  }
}

function hostFromUrl(value: string): string | null {
  try {
    return new URL(value).hostname.replace(/^www\./, "").toLowerCase();
  } catch {
    return null;
  }
}

function looksLikeProcurementUrl(value: string): boolean {
  const text = value.toLowerCase();
  return /\b(procurement|tender|bid|rfp|rfq|solicitation|contract|notice|opportunit|opp|supplier|vendor)\b/.test(text);
}

function portalNameFromUrl(value: string): string | null {
  const host = hostFromUrl(value);
  if (!host) return null;
  if (host.includes("sam.gov")) return "SAM.gov";
  if (host.includes("contractsfinder.service.gov.uk")) return "Contracts Finder";
  if (host.includes("find-tender.service.gov.uk")) return "Find a Tender";
  if (host.includes("ted.europa.eu")) return "Tenders Electronic Daily";
  if (host.includes("canadabuys.canada.ca")) return "CanadaBuys";
  if (host.includes("tenders.gov.au")) return "AusTender";
  if (host.includes("gets.govt.nz")) return "GETS";
  if (host.includes("gebiz.gov.sg")) return "GeBIZ";
  return host;
}

function nullableText(value: unknown): string | null {
  const text = clean(String(value ?? ""));
  return text || null;
}

function clean(value: string): string {
  return value.replace(/\s+/g, " ").trim();
}

function unique(values: string[]): string[] {
  return Array.from(new Set(values.map(clean).filter(Boolean)));
}

function slug(value: string): string {
  return clean(value)
    .toLowerCase()
    .replace(/https?:\/\//, "")
    .replace(/[^a-z0-9]+/g, "-")
    .replace(/^-|-$/g, "")
    .slice(0, 180);
}

function stripTracking(value: string): string {
  try {
    const url = new URL(value);
    const keep = new URL(url.origin + url.pathname);
    const stableParams = ["id", "noticeId", "notice-id", "opp", "opportunityId", "opportunity-id"];
    for (const name of stableParams) {
      const param = url.searchParams.get(name);
      if (param) keep.searchParams.set(name, param);
    }
    return keep.toString();
  } catch {
    return slug(value);
  }
}

function parseVisibleDate(value: string | null): Date | null {
  const text = clean(value ?? "");
  if (!text) return null;

  const withoutOrdinals = text
    .replace(/\b(\d{1,2})(st|nd|rd|th)\b/gi, "$1")
    .replace(/\bat\b/gi, " ");
  const parsed = Date.parse(withoutOrdinals);
  if (Number.isNaN(parsed)) return null;
  return new Date(parsed);
}

function startOfTodayUtc(): Date {
  const now = new Date();
  return new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()));
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.