Prometheus

Local Business Search collector facts

Publisher: sideguide (@sideguide).

Version: 1. Last updated: 2026-06-13T21:42:12.764Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: count, query, location, sourceUrl, businesses, name, phone, rating, address, website, category, reviewCount.

Parameters: query (string, required).

Local Business Search

v1Published

Find local businesses for a search query — name, address, phone, website, category, rating, and review count. Parameter: query.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
count30
querycoffee shops in Austin TX
locationAustin TX
sourceUrlhttps://www.yellowpages.com/search?search_terms=coffee+shops&geo_location_terms=Austin+TX
businesses
searchTermscoffee shops
Parameters
--querystringrequiredThe local-business search, ideally in the form '<what> in <where>', e.g. 'coffee shops in Austin TX' or 'plumbers in Denver CO'. e.g. "coffee shops in Austin TX"

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
sideguide@sideguide
0 runs in 14d · published 6h ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent6h ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
import { parseArgs } from "node:util";

/**
 * Local-business search → structured listings.
 *
 * Given a natural-language local-business query (e.g. "coffee shops in
 * Austin TX"), this scrapes the matching Yellow Pages search-results page and
 * extracts, for each business: name, full address, phone, website, primary
 * category, star rating, and number of reviews.
 *
 * Yellow Pages is used as the source because a single search-results page
 * already carries every requested field in clean, server-rendered HTML, so the
 * data can be picked out deterministically with CSS selectors (no LLM needed).
 *
 * The query is split into a "what" and a "where" on the last " in " token:
 *   "coffee shops in Austin TX" -> terms="coffee shops", location="Austin TX".
 * If there is no " in " token, the whole query is used as the search terms with
 * no location filter.
 *
 * Output (stdout, JSON only):
 *   {
 *     query: string,
 *     searchTerms: string,
 *     location: string | null,
 *     sourceUrl: string,
 *     count: number,
 *     businesses: Array<{
 *       name: string,
 *       address: string | null,
 *       phone: string | null,
 *       website: string | null,
 *       category: string | null,   // primary (first) category
 *       rating: number | null,     // stars, 0-5 scale
 *       reviewCount: number | null
 *     }>
 *   }
 */

interface Business {
  name: string;
  address: string | null;
  phone: string | null;
  website: string | null;
  category: string | null;
  rating: number | null;
  reviewCount: number | null;
}

const STAR_WORDS: Record<string, number> = {
  zero: 0,
  one: 1,
  two: 2,
  three: 3,
  four: 4,
  five: 5,
};

function cleanText(v: string | undefined | null): string | null {
  if (typeof v !== "string") return null;
  const t = v.replace(/\s+/g, " ").trim();
  return t.length > 0 ? t : null;
}

// Yellow Pages encodes the star rating as class words on .result-rating,
// e.g. "result-rating three half" -> 3.5, "result-rating five" -> 5.
function parseRating(classAttr: string | undefined): number | null {
  if (!classAttr) return null;
  const words = classAttr.toLowerCase().split(/\s+/);
  let base: number | null = null;
  let half = false;
  for (const w of words) {
    if (w in STAR_WORDS) base = STAR_WORDS[w];
    else if (w === "half") half = true;
  }
  if (base === null) return null;
  return half ? base + 0.5 : base;
}

// "(12)" -> 12 ; "" / no digits -> null
function parseReviewCount(v: string | null): number | null {
  if (!v) return null;
  const m = v.match(/\d[\d,]*/);
  if (!m) return null;
  const n = Number(m[0].replace(/,/g, ""));
  return Number.isFinite(n) ? n : null;
}

// Split "coffee shops in Austin TX" -> { terms, location } on the LAST " in ".
function splitQuery(query: string): { terms: string; location: string | null } {
  const q = query.trim();
  const m = q.match(/^(.*\S)\s+in\s+(\S.*)$/i);
  if (m) return { terms: m[1].trim(), location: m[2].trim() };
  return { terms: q, location: null };
}

async function main(): Promise<void> {
  const { values } = parseArgs({
    options: {
      query: { type: "string" },
    },
    strict: true,
  });

  const query = values.query;
  if (!query || query.trim().length === 0) {
    console.error("Missing required parameter: --query=<local-business search>");
    process.exit(1);
  }

  const apiKey = process.env.FIRECRAWL_API_KEY;
  if (!apiKey) {
    console.error("FIRECRAWL_API_KEY environment variable is not set");
    process.exit(1);
  }

  const { terms, location } = splitQuery(query);

  // Build the Yellow Pages search URL. encodeURIComponent (encoding, not
  // decoding) keeps the params safe; spaces become "+" for readability.
  const params = [`search_terms=${encodeURIComponent(terms).replace(/%20/g, "+")}`];
  if (location) {
    params.push(`geo_location_terms=${encodeURIComponent(location).replace(/%20/g, "+")}`);
  }
  const sourceUrl = `https://www.yellowpages.com/search?${params.join("&")}`;

  const firecrawl = new Firecrawl({ apiKey });

  const res = (await firecrawl.scrape(sourceUrl, {
    formats: ["html"],
    onlyMainContent: false,
    proxy: "auto",
    integration: "prometheus",
  } as Parameters<typeof firecrawl.scrape>[1])) as { html?: string };

  const html = res?.html;
  if (typeof html !== "string" || html.length === 0) {
    throw new Error(`no HTML returned for ${sourceUrl} (page may be bot-blocked or empty)`);
  }

  const $ = cheerio.load(html);
  const results = $("div.result");
  if (results.length === 0) {
    // In-scope page that simply yielded no listings (e.g. obscure query/location).
    const empty = {
      query,
      searchTerms: terms,
      location,
      sourceUrl,
      count: 0,
      businesses: [] as Business[],
    };
    process.stdout.write(JSON.stringify(empty));
    return;
  }

  const businesses: Business[] = [];
  results.each((_, el) => {
    const node = $(el);

    const name = cleanText(node.find("a.business-name").first().text());
    if (!name) return; // skip non-listing rows

    const street = cleanText(node.find(".street-address").first().text());
    const locality = cleanText(node.find(".locality").first().text());
    const address = [street, locality].filter(Boolean).join(", ") || null;

    const phone = cleanText(node.find(".phones.phone.primary").first().text());

    const website = node.find("a.track-visit-website").first().attr("href") || null;

    const category = cleanText(node.find(".categories a").first().text());

    const rating = parseRating(node.find(".result-rating").first().attr("class"));
    const reviewCount = parseReviewCount(cleanText(node.find(".ratings .count").first().text()));

    businesses.push({ name, address, phone, website, category, rating, reviewCount });
  });

  if (businesses.length === 0) {
    throw new Error(`found ${results.length} result blocks but none had a business name (page structure may have changed)`);
  }

  const out = {
    query,
    searchTerms: terms,
    location,
    sourceUrl,
    count: businesses.length,
    businesses,
  };

  process.stdout.write(JSON.stringify(out));
}

main().catch((err) => {
  console.error(err instanceof Error ? err.message : String(err));
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.
Local Business Search Data Collector | Firecrawl Prometheus