"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); // import puppeteer, { Browser, ElementHandle, Page } from 'puppeteer-core'; const puppeteer_1 = __importDefault(require("puppeteer")); const currency_js_1 = __importDefault(require("currency.js")); const fs_extra_1 = __importDefault(require("fs-extra")); const typesOfBusiness = [ { name: 'Automotive', value: '1' }, { name: 'Industrial Services', value: '2' }, { name: 'Real Estate', value: '3' }, { name: 'Uncategorized', value: '4' }, { name: 'Retail', value: '5' }, { name: 'Oilfield SVE and MFG.', value: '6' }, { name: 'Service', value: '7' }, { name: 'Advertising', value: '8' }, { name: 'Agriculture', value: '9' }, { name: 'Franchise', value: '10' }, { name: 'Professional', value: '11' }, { name: 'Manufacturing', value: '12' }, { name: 'Food and Restaurant', value: '13' }, ]; function getParentElementText(elementHandle) { return __awaiter(this, void 0, void 0, function* () { const textContent = elementHandle ? yield elementHandle.evaluate((el) => { const getText = (nodes) => { const result = []; //debugger; for (const node of nodes) { if (node.nodeType === Node.TEXT_NODE && node.nodeValue !== "\n") { result.push(node.nodeValue.replace('\n', '')); } } return result; }; const parent = el.parentElement; if (!parent) return null; let text = ''; const preResult = Array.from(parent.childNodes).find((e) => e.nodeName === 'PRE'); if (preResult) { return getText(Array.from(preResult.childNodes)); } else { return getText(Array.from(parent.childNodes)); } }) : null; return textContent ? (textContent.length < 2 ? textContent.join() : textContent) : null; }); } function extractListingData(page) { var _a, _b; return __awaiter(this, void 0, void 0, function* () { const labels = { summaryLabel: 'Summary', descriptionLabel: 'Description', categoryLabel: 'Category:', locationLabel: 'Located in:', askingPriceLabel: 'Asking Price:', realEstateLabel: 'Real Estate Included:', salesRevenueLabel: 'Sales revenue:', cashflowLabel: 'Cash flow:', inventoryLabel: 'Inventory:', brokerLabel: 'Broker licensing:', reasonLabel: 'Reason for sale:', employeesLabel: 'Employees:', }; const title = (yield page.$eval('div.title', (el) => el.textContent)).trim(); const content = {}; for (const key of Object.values(labels)) { const element = yield findElementWithText(page, 'div.sub-title', key); try { content[key] = element ? yield getParentElementText(element) : 'N/A'; } catch (error) { console.log(`Fehler bei : ${key}`); } } let categoryType; if (content['Category:']) { categoryType = typesOfBusiness.find((t) => t.name.toLowerCase() === content['Category:'].toLowerCase()); } else { console.log(`---> No Category ...`); } if (!categoryType) { console.log(`---> ${content['Category:']}`); } try { const listing = { id: 'NA', userId: '1', listingsCategory: 'business', title: title, summary: Array.isArray(content[labels.summaryLabel]) ? content[labels.summaryLabel] : [content[labels.summaryLabel]], description: Array.isArray(content[labels.descriptionLabel]) ? content[labels.descriptionLabel] : [content[labels.descriptionLabel]], type: categoryType.value, location: content[labels.locationLabel], price: (0, currency_js_1.default)(content[labels.askingPriceLabel]).value, salesRevenue: (_a = (0, currency_js_1.default)(content[labels.salesRevenueLabel])) === null || _a === void 0 ? void 0 : _a.value, cashFlow: (_b = (0, currency_js_1.default)(content[labels.cashflowLabel])) === null || _b === void 0 ? void 0 : _b.value, brokerLicencing: content[labels.brokerLabel], established: null, realEstateIncluded: content[labels.realEstateLabel] === 'Yes' ? true : false, inventory: content[labels.inventoryLabel], employees: content[labels.employeesLabel], reasonForSale: content[labels.reasonLabel], internals: '', }; return listing; } catch (error) { console.log(`Fehler bei ${title}`); return null; } }); } function findElementWithText(page, selector, text) { return __awaiter(this, void 0, void 0, function* () { const elementHandle = yield page.evaluateHandle((selector, text) => { const elements = Array.from(document.querySelectorAll(selector)); return elements.find((element) => { var _a; return ((_a = element.textContent) === null || _a === void 0 ? void 0 : _a.trim()) === text; }); }, selector, text); return elementHandle; }); } function processPage(browser, url, out) { return __awaiter(this, void 0, void 0, function* () { const page = yield browser.newPage(); yield page.goto(url, { waitUntil: 'domcontentloaded' }); const listings = yield page.$$('div.ResultsGridItem'); for (const listing of listings) { const detailLinkElement = yield listing.$('a.viewListing'); if (detailLinkElement) { const detailLink = yield detailLinkElement.evaluate((el) => el.getAttribute('href')); const detailPage = yield browser.newPage(); yield detailPage.goto(detailLink, { waitUntil: 'domcontentloaded' }); const listingData = yield extractListingData(detailPage); if (listingData) { console.log(JSON.stringify(listingData)); out.push(listingData); } yield detailPage.close(); } } const nextPageElement = yield page.$('a.next'); if (nextPageElement) { let nextPageLink = yield nextPageElement.evaluate((el) => el.getAttribute('href')); if (!nextPageLink.startsWith('https')) { const origin = yield page.evaluate(() => location.origin); nextPageLink = `${origin}${nextPageLink}`; } yield processPage(browser, nextPageLink, out); } yield page.close(); }); } (() => __awaiter(void 0, void 0, void 0, function* () { const browser = yield puppeteer_1.default.launch({ headless: true, executablePath: '/snap/bin/chromium', devtools: true, slowMo: 50 }); //const browser = await puppeteer.launch({devtools: true}); const out = []; yield processPage(browser, 'https://www.bizmatch.net/results', out); yield fs_extra_1.default.writeJson('./listings.json', out); yield browser.close(); }))(); //# sourceMappingURL=index.js.map