DoorDash Cafe Menus
v1PublishedA DoorDash cafe menu dataset with venue details and categorized menu items for the sample cafe.
Output & API
Preview the latest data, download it, or call this collector as an API.
| venues |
|---|
Parameters
--urlstring[]DoorDash store page URLs to collect menus from; repeat this flag once per cafe. If omitted, the collector uses the tested sample cafe URL. default ["https://www.doordash.com/store/caffe-in-coffee-co-25001927/"]
Marketplace
Publish this collector so others can deploy it — you keep ownership.
0 runs in 14d · published 4d ago
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
v1builtapprovedcurrent4d ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
import { parseArgs } from "node:util";
const DEFAULT_URLS = ["https://www.doordash.com/store/caffe-in-coffee-co-25001927/"];
const SKIP_SECTIONS = new Set([
"Featured Items",
"Most Ordered",
"Popular Items",
"Reviews",
]);
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const { values: flags } = parseArgs({
strict: true,
options: {
url: { type: "string", multiple: true },
},
});
const urls = flags.url && flags.url.length > 0 ? flags.url : DEFAULT_URLS;
const firecrawl = new Firecrawl({ apiKey });
type JsonValue = null | boolean | number | string | JsonValue[] | { [key: string]: JsonValue };
type PostalAddress = {
streetAddress?: string;
addressLocality?: string;
addressRegion?: string;
postalCode?: string;
addressCountry?: string;
};
type MenuItem = {
"@type"?: string;
name?: string;
description?: string;
offers?: {
price?: string | number;
};
};
type MenuSection = {
name?: string;
hasMenuItem?: MenuItem | MenuItem[];
};
type RestaurantJsonLd = {
"@type"?: string | string[];
name?: string;
address?: PostalAddress;
hasMenu?: {
hasMenuSection?: MenuSection | MenuSection[] | MenuSection[][];
};
};
function flatten<T>(value: T | T[] | T[][] | undefined | null): T[] {
if (!value) return [];
if (Array.isArray(value)) return value.flatMap((entry) => flatten(entry as T | T[] | T[][]));
return [value];
}
function text(value: unknown): string {
return typeof value === "string" || typeof value === "number" ? String(value).trim() : "";
}
function validateDoorDashStoreUrl(url: string): void {
let parsed: URL;
try {
parsed = new URL(url);
} catch {
throw new Error("OUT_OF_SCOPE: invalid URL");
}
if (!/(^|\.)doordash\.com$/i.test(parsed.hostname) || !parsed.pathname.startsWith("/store/")) {
throw new Error("OUT_OF_SCOPE: not a DoorDash store URL");
}
}
function isRestaurantJsonLd(value: JsonValue): value is RestaurantJsonLd {
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
const type = (value as RestaurantJsonLd)["@type"];
const types = Array.isArray(type) ? type : [type];
return types.includes("Restaurant") && Boolean((value as RestaurantJsonLd).hasMenu);
}
function findRestaurantJsonLd(rawHtml: string): RestaurantJsonLd {
const $ = cheerio.load(rawHtml);
let restaurant: RestaurantJsonLd | undefined;
$('script[type="application/ld+json"]').each((_, element) => {
if (restaurant) return;
const scriptText = $(element).text().trim();
if (!scriptText) return;
try {
const parsed = JSON.parse(scriptText) as JsonValue;
const candidates = Array.isArray(parsed) ? parsed : [parsed];
restaurant = candidates.find(isRestaurantJsonLd);
} catch {
// Ignore unrelated malformed JSON-LD blocks.
}
});
if (!restaurant) {
throw new Error("no Restaurant JSON-LD menu found on DoorDash store page");
}
return restaurant;
}
function formatAddress(address: PostalAddress | undefined): string {
if (!address) return "";
return [
address.streetAddress,
[address.addressLocality, address.addressRegion, address.postalCode].filter(Boolean).join(", "),
address.addressCountry,
]
.filter(Boolean)
.join(", ");
}
async function collectVenue(url: string) {
validateDoorDashStoreUrl(url);
const page = await firecrawl.v1.scrapeUrl(url, {
formats: ["rawHtml"],
onlyMainContent: false,
integration: "prometheus",
timeout: 30000,
});
if (!page.success || !page.rawHtml) {
throw new Error("DoorDash scrape did not return raw HTML");
}
const restaurant = findRestaurantJsonLd(page.rawHtml);
const sections = flatten(restaurant.hasMenu?.hasMenuSection).filter(
(section) => section.name && section.hasMenuItem && !SKIP_SECTIONS.has(section.name),
);
const items = sections.flatMap((section) =>
flatten(section.hasMenuItem).map((item) => ({
name: text(item.name),
description: text(item.description),
price: text(item.offers?.price),
category: text(section.name),
})),
).filter((item) => item.name && item.category);
if (!restaurant.name) {
throw new Error("Restaurant JSON-LD menu is missing venue name");
}
if (items.length === 0) {
throw new Error("Restaurant JSON-LD menu contains no menu items");
}
return {
name: restaurant.name,
address: formatAddress(restaurant.address),
items,
};
}
async function main() {
const venues = [];
for (const url of urls) {
venues.push(await collectVenue(url));
}
process.stdout.write(JSON.stringify({ venues }));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.