From f973b87a2ddf7e483af2bda49b7ea406b20bdd58 Mon Sep 17 00:00:00 2001 From: Andreas Knuth Date: Sun, 9 Nov 2025 16:18:06 -0600 Subject: [PATCH] asd --- crawler/changeUserId.js | 40 --------- crawler/import.js | 27 ------ crawler/index.js | 179 ---------------------------------------- crawler/updateFields.js | 40 --------- 4 files changed, 286 deletions(-) delete mode 100644 crawler/changeUserId.js delete mode 100644 crawler/import.js delete mode 100644 crawler/index.js delete mode 100644 crawler/updateFields.js diff --git a/crawler/changeUserId.js b/crawler/changeUserId.js deleted file mode 100644 index d7b3fde..0000000 --- a/crawler/changeUserId.js +++ /dev/null @@ -1,40 +0,0 @@ -"use strict"; -var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { - function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } - return new (P || (P = Promise))(function (resolve, reject) { - function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } - function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } - function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } - step((generator = generator.apply(thisArg, _arguments || [])).next()); - }); -}; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -const yargs_1 = __importDefault(require("yargs")); -const helpers_1 = require("yargs/helpers"); -const argv = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv)).argv; -if (!argv.userId) { - console.log(' --userId [any valid userId]'); - process.exit(1); -} -(() => __awaiter(void 0, void 0, void 0, function* () { - console; - const response = yield fetch('http://localhost:3000/bizmatch/listings', { - method: 'GET', - headers: { 'Content-Type': 'application/json' }, - }); - const listings = yield response.json(); - for (const listing of listings) { - listing.userId = argv.userId; - listing.created = new Date(); - listing.updated = new Date(); - const response = yield fetch(`http://localhost:3000/bizmatch/listings/${listing.id}`, { - method: 'PUT', - body: JSON.stringify(listing), - headers: { 'Content-Type': 'application/json' }, - }); - } -}))(); -//# sourceMappingURL=changeUserId.js.map \ No newline at end of file diff --git a/crawler/import.js b/crawler/import.js deleted file mode 100644 index 33e8190..0000000 --- a/crawler/import.js +++ /dev/null @@ -1,27 +0,0 @@ -"use strict"; -var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { - function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } - return new (P || (P = Promise))(function (resolve, reject) { - function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } - function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } - function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } - step((generator = generator.apply(thisArg, _arguments || [])).next()); - }); -}; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -const fs_extra_1 = __importDefault(require("fs-extra")); -(() => __awaiter(void 0, void 0, void 0, function* () { - const listings = yield fs_extra_1.default.readJson('./listings.json'); - //listings.forEach(element => { - for (const listing of listings) { - const response = yield fetch('http://localhost:3000/bizmatch/listings', { - method: 'POST', - body: JSON.stringify(listing), - headers: { 'Content-Type': 'application/json' }, - }); - } -}))(); -//# sourceMappingURL=import.js.map \ No newline at end of file diff --git a/crawler/index.js b/crawler/index.js deleted file mode 100644 index d48b259..0000000 --- a/crawler/index.js +++ /dev/null @@ -1,179 +0,0 @@ -"use strict"; -var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { - function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } - return new (P || (P = Promise))(function (resolve, reject) { - function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } - function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } - function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } - step((generator = generator.apply(thisArg, _arguments || [])).next()); - }); -}; -var __importDefault = (this && this.__importDefault) || function (mod) { - return (mod && mod.__esModule) ? mod : { "default": mod }; -}; -Object.defineProperty(exports, "__esModule", { value: true }); -// import puppeteer, { Browser, ElementHandle, Page } from 'puppeteer-core'; -const puppeteer_1 = __importDefault(require("puppeteer")); -const currency_js_1 = __importDefault(require("currency.js")); -const fs_extra_1 = __importDefault(require("fs-extra")); -const typesOfBusiness = [ - { name: 'Automotive', value: '1' }, - { name: 'Industrial Services', value: '2' }, - { name: 'Real Estate', value: '3' }, - { name: 'Uncategorized', value: '4' }, - { name: 'Retail', value: '5' }, - { name: 'Oilfield SVE and MFG.', value: '6' }, - { name: 'Service', value: '7' }, - { name: 'Advertising', value: '8' }, - { name: 'Agriculture', value: '9' }, - { name: 'Franchise', value: '10' }, - { name: 'Professional', value: '11' }, - { name: 'Manufacturing', value: '12' }, - { name: 'Food and Restaurant', value: '13' }, -]; -function getParentElementText(elementHandle) { - return __awaiter(this, void 0, void 0, function* () { - const textContent = elementHandle - ? yield elementHandle.evaluate((el) => { - const getText = (nodes) => { - const result = []; - //debugger; - for (const node of nodes) { - if (node.nodeType === Node.TEXT_NODE && node.nodeValue !== "\n") { - result.push(node.nodeValue.replace('\n', '')); - } - } - return result; - }; - const parent = el.parentElement; - if (!parent) - return null; - let text = ''; - const preResult = Array.from(parent.childNodes).find((e) => e.nodeName === 'PRE'); - if (preResult) { - return getText(Array.from(preResult.childNodes)); - } - else { - return getText(Array.from(parent.childNodes)); - } - }) - : null; - return textContent ? (textContent.length < 2 ? textContent.join() : textContent) : null; - }); -} -function extractListingData(page) { - var _a, _b; - return __awaiter(this, void 0, void 0, function* () { - const labels = { - summaryLabel: 'Summary', - descriptionLabel: 'Description', - categoryLabel: 'Category:', - locationLabel: 'Located in:', - askingPriceLabel: 'Asking Price:', - realEstateLabel: 'Real Estate Included:', - salesRevenueLabel: 'Sales revenue:', - cashflowLabel: 'Cash flow:', - inventoryLabel: 'Inventory:', - brokerLabel: 'Broker licensing:', - reasonLabel: 'Reason for sale:', - employeesLabel: 'Employees:', - }; - const title = (yield page.$eval('div.title', (el) => el.textContent)).trim(); - const content = {}; - for (const key of Object.values(labels)) { - const element = yield findElementWithText(page, 'div.sub-title', key); - try { - content[key] = element ? yield getParentElementText(element) : 'N/A'; - } - catch (error) { - console.log(`Fehler bei : ${key}`); - } - } - let categoryType; - if (content['Category:']) { - categoryType = typesOfBusiness.find((t) => t.name.toLowerCase() === content['Category:'].toLowerCase()); - } - else { - console.log(`---> No Category ...`); - } - if (!categoryType) { - console.log(`---> ${content['Category:']}`); - } - try { - const listing = { - id: 'NA', - userId: '1', - listingsCategory: 'business', - title: title, - summary: Array.isArray(content[labels.summaryLabel]) ? content[labels.summaryLabel] : [content[labels.summaryLabel]], - description: Array.isArray(content[labels.descriptionLabel]) ? content[labels.descriptionLabel] : [content[labels.descriptionLabel]], - type: categoryType.value, - location: content[labels.locationLabel], - price: (0, currency_js_1.default)(content[labels.askingPriceLabel]).value, - salesRevenue: (_a = (0, currency_js_1.default)(content[labels.salesRevenueLabel])) === null || _a === void 0 ? void 0 : _a.value, - cashFlow: (_b = (0, currency_js_1.default)(content[labels.cashflowLabel])) === null || _b === void 0 ? void 0 : _b.value, - brokerLicencing: content[labels.brokerLabel], - established: null, - realEstateIncluded: content[labels.realEstateLabel] === 'Yes' ? true : false, - inventory: content[labels.inventoryLabel], - employees: content[labels.employeesLabel], - reasonForSale: content[labels.reasonLabel], - internals: '', - }; - return listing; - } - catch (error) { - console.log(`Fehler bei ${title}`); - return null; - } - }); -} -function findElementWithText(page, selector, text) { - return __awaiter(this, void 0, void 0, function* () { - const elementHandle = yield page.evaluateHandle((selector, text) => { - const elements = Array.from(document.querySelectorAll(selector)); - return elements.find((element) => { var _a; return ((_a = element.textContent) === null || _a === void 0 ? void 0 : _a.trim()) === text; }); - }, selector, text); - return elementHandle; - }); -} -function processPage(browser, url, out) { - return __awaiter(this, void 0, void 0, function* () { - const page = yield browser.newPage(); - yield page.goto(url, { waitUntil: 'domcontentloaded' }); - const listings = yield page.$$('div.ResultsGridItem'); - for (const listing of listings) { - const detailLinkElement = yield listing.$('a.viewListing'); - if (detailLinkElement) { - const detailLink = yield detailLinkElement.evaluate((el) => el.getAttribute('href')); - const detailPage = yield browser.newPage(); - yield detailPage.goto(detailLink, { waitUntil: 'domcontentloaded' }); - const listingData = yield extractListingData(detailPage); - if (listingData) { - console.log(JSON.stringify(listingData)); - out.push(listingData); - } - yield detailPage.close(); - } - } - const nextPageElement = yield page.$('a.next'); - if (nextPageElement) { - let nextPageLink = yield nextPageElement.evaluate((el) => el.getAttribute('href')); - if (!nextPageLink.startsWith('https')) { - const origin = yield page.evaluate(() => location.origin); - nextPageLink = `${origin}${nextPageLink}`; - } - yield processPage(browser, nextPageLink, out); - } - yield page.close(); - }); -} -(() => __awaiter(void 0, void 0, void 0, function* () { - const browser = yield puppeteer_1.default.launch({ headless: true, executablePath: '/snap/bin/chromium', devtools: true, slowMo: 50 }); - //const browser = await puppeteer.launch({devtools: true}); - const out = []; - yield processPage(browser, 'https://www.bizmatch.net/results', out); - yield fs_extra_1.default.writeJson('./listings.json', out); - yield browser.close(); -}))(); -//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/crawler/updateFields.js b/crawler/updateFields.js deleted file mode 100644 index fe0b13b..0000000 --- a/crawler/updateFields.js +++ /dev/null @@ -1,40 +0,0 @@ -"use strict"; -var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { - function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } - return new (P || (P = Promise))(function (resolve, reject) { - function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } - function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } - function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } - step((generator = generator.apply(thisArg, _arguments || [])).next()); - }); -}; -Object.defineProperty(exports, "__esModule", { value: true }); -//const argv = yargs(hideBin(process.argv)).argv -// if (!argv.userId){ -// console.log(' --userId [any valid userId]') -// process.exit(1) -// } -(() => __awaiter(void 0, void 0, void 0, function* () { - const selectOptionsResponse = yield fetch('http://localhost:3000/bizmatch/select-options', { - method: 'GET', - headers: { 'Content-Type': 'application/json' }, - }); - const selectOptions = yield selectOptionsResponse.json(); - const response = yield fetch('http://localhost:3000/bizmatch/listings', { - method: 'GET', - headers: { 'Content-Type': 'application/json' }, - }); - const listings = yield response.json(); - for (const listing of listings) { - const option = selectOptions.locations.find(l => l.name.toLowerCase() === listing.location.toLowerCase()); - if (option) { - listing.location = option.value; - } - const response = yield fetch(`http://localhost:3000/bizmatch/listings/${listing.id}`, { - method: 'PUT', - body: JSON.stringify(listing), - headers: { 'Content-Type': 'application/json' }, - }); - } -}))(); -//# sourceMappingURL=updateFields.js.map \ No newline at end of file