Job Listings Search
v1PublishedFind current job listings for a query and location — title, company, location, salary, posting date, and URL. Parameters: query, location.
Output & API
Preview the latest data, download it, or call this collector as an API.
| jobs | |
|---|---|
| count | 50 |
| query | software engineer |
| source | simplyhired.com |
| location | San Francisco, CA |
| retrievedAt | 2026-06-13T21:54:35.647Z |
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import { parseArgs } from "node:util";
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
// ---------------------------------------------------------------------------
// CLI parameters
// ---------------------------------------------------------------------------
const { values } = parseArgs({
strict: true,
options: {
query: { type: "string" },
location: { type: "string" },
"max-results": { type: "string" },
},
});
const query = (values.query ?? "").trim();
const location = (values.location ?? "").trim();
const maxResults = Math.max(1, Number(values["max-results"] ?? "50") || 50);
if (!query) {
console.error("Missing required --query parameter");
process.exit(1);
}
if (!location) {
console.error("Missing required --location parameter");
process.exit(1);
}
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("Missing FIRECRAWL_API_KEY environment variable");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const ORIGIN = "https://www.simplyhired.com";
type Job = {
title: string;
company: string | null;
location: string | null;
salary: string | null;
datePosted: string | null;
url: string;
};
// Build a SimplyHired search URL. SimplyHired paginates with an opaque
// `cursor` token that the previous page exposes in its embedded state.
function buildSearchUrl(cursor: string | null): string {
const params = new URLSearchParams();
params.set("q", query);
params.set("l", location);
if (cursor) params.set("cursor", cursor);
return `${ORIGIN}/search?${params.toString()}`;
}
// SimplyHired ships the full result set as JSON inside the Next.js
// `__NEXT_DATA__` <script> blob, so we parse that rather than scraping
// rendered markup. The blob survives only in rawHtml (html strips <script>).
function parsePage(rawHtml: string): {
jobs: Job[];
nextCursor: string | null;
} {
const $ = cheerio.load(rawHtml);
const blob = $("#__NEXT_DATA__").first().html();
if (!blob) {
throw new Error(
"no __NEXT_DATA__ blob found on SimplyHired search page",
);
}
let parsed: any;
try {
parsed = JSON.parse(blob);
} catch {
throw new Error("failed to JSON.parse __NEXT_DATA__ blob");
}
const pageProps = parsed?.props?.pageProps;
const rawJobs = pageProps?.jobs;
if (!Array.isArray(rawJobs)) {
throw new Error("no jobs array in SimplyHired page state");
}
const jobs: Job[] = rawJobs.map((j: any) => {
// botUrl is the clean, share-safe relative path (e.g. /job/<key>);
// fall back to constructing it from the jobKey.
const path: string =
typeof j.botUrl === "string" && j.botUrl.startsWith("/")
? j.botUrl
: `/job/${j.jobKey}`;
let datePosted: string | null = null;
if (typeof j.dateOnIndeed === "number" && j.dateOnIndeed > 0) {
const d = new Date(j.dateOnIndeed);
if (!Number.isNaN(d.getTime())) {
datePosted = d.toISOString().slice(0, 10);
}
}
const salary =
typeof j.salaryInfo === "string" && j.salaryInfo.trim()
? j.salaryInfo.trim()
: null;
return {
title: typeof j.title === "string" ? j.title : "",
company: typeof j.company === "string" ? j.company : null,
location: typeof j.location === "string" ? j.location : null,
salary,
datePosted,
url: `${ORIGIN}${path}`,
};
});
// The next page's cursor is keyed by page number in pageCursors.
const currentPage = Number(pageProps?.currentPageNumber ?? 1);
const cursors = pageProps?.pageCursors ?? {};
const nextCursor =
typeof cursors?.[String(currentPage + 1)] === "string"
? cursors[String(currentPage + 1)]
: null;
return { jobs, nextCursor };
}
async function scrapeRawHtml(url: string): Promise<string> {
const res: any = await firecrawl.scrape(url, {
formats: ["rawHtml"],
onlyMainContent: false,
integration: "prometheus",
});
const html: unknown = res?.rawHtml ?? res?.data?.rawHtml;
if (typeof html !== "string" || !html) {
throw new Error("SimplyHired scrape returned no rawHtml content");
}
return html;
}
async function main() {
const collected: Job[] = [];
const seen = new Set<string>();
let cursor: string | null = null;
// Safety cap on page fetches so a never-ending cursor chain can't loop.
const maxPages = Math.min(15, Math.ceil(maxResults / 20) + 1);
for (let page = 0; page < maxPages; page++) {
const url = buildSearchUrl(cursor);
const rawHtml = await scrapeRawHtml(url);
const { jobs, nextCursor } = parsePage(rawHtml);
for (const job of jobs) {
if (seen.has(job.url)) continue;
seen.add(job.url);
collected.push(job);
}
console.error(
`page ${page + 1}: +${jobs.length} jobs (total ${collected.length})`,
);
if (collected.length >= maxResults) break;
if (!nextCursor) break;
cursor = nextCursor;
}
const jobs = collected.slice(0, maxResults);
const out = {
source: "simplyhired.com",
query,
location,
retrievedAt: new Date().toISOString(),
count: jobs.length,
jobs,
};
process.stdout.write(JSON.stringify(out));
}
main().catch((err) => {
console.error(err instanceof Error ? err.message : String(err));
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.