asd
This commit is contained in:
parent
995468fa30
commit
f973b87a2d
|
|
@ -1,40 +0,0 @@
|
|||
"use strict";
|
||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
||||
return new (P || (P = Promise))(function (resolve, reject) {
|
||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const yargs_1 = __importDefault(require("yargs"));
|
||||
const helpers_1 = require("yargs/helpers");
|
||||
const argv = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv)).argv;
|
||||
if (!argv.userId) {
|
||||
console.log(' --userId [any valid userId]');
|
||||
process.exit(1);
|
||||
}
|
||||
(() => __awaiter(void 0, void 0, void 0, function* () {
|
||||
console;
|
||||
const response = yield fetch('http://localhost:3000/bizmatch/listings', {
|
||||
method: 'GET',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
const listings = yield response.json();
|
||||
for (const listing of listings) {
|
||||
listing.userId = argv.userId;
|
||||
listing.created = new Date();
|
||||
listing.updated = new Date();
|
||||
const response = yield fetch(`http://localhost:3000/bizmatch/listings/${listing.id}`, {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(listing),
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
}
|
||||
}))();
|
||||
//# sourceMappingURL=changeUserId.js.map
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
"use strict";
|
||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
||||
return new (P || (P = Promise))(function (resolve, reject) {
|
||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const fs_extra_1 = __importDefault(require("fs-extra"));
|
||||
(() => __awaiter(void 0, void 0, void 0, function* () {
|
||||
const listings = yield fs_extra_1.default.readJson('./listings.json');
|
||||
//listings.forEach(element => {
|
||||
for (const listing of listings) {
|
||||
const response = yield fetch('http://localhost:3000/bizmatch/listings', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(listing),
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
}
|
||||
}))();
|
||||
//# sourceMappingURL=import.js.map
|
||||
179
crawler/index.js
179
crawler/index.js
|
|
@ -1,179 +0,0 @@
|
|||
"use strict";
|
||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
||||
return new (P || (P = Promise))(function (resolve, reject) {
|
||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
// import puppeteer, { Browser, ElementHandle, Page } from 'puppeteer-core';
|
||||
const puppeteer_1 = __importDefault(require("puppeteer"));
|
||||
const currency_js_1 = __importDefault(require("currency.js"));
|
||||
const fs_extra_1 = __importDefault(require("fs-extra"));
|
||||
const typesOfBusiness = [
|
||||
{ name: 'Automotive', value: '1' },
|
||||
{ name: 'Industrial Services', value: '2' },
|
||||
{ name: 'Real Estate', value: '3' },
|
||||
{ name: 'Uncategorized', value: '4' },
|
||||
{ name: 'Retail', value: '5' },
|
||||
{ name: 'Oilfield SVE and MFG.', value: '6' },
|
||||
{ name: 'Service', value: '7' },
|
||||
{ name: 'Advertising', value: '8' },
|
||||
{ name: 'Agriculture', value: '9' },
|
||||
{ name: 'Franchise', value: '10' },
|
||||
{ name: 'Professional', value: '11' },
|
||||
{ name: 'Manufacturing', value: '12' },
|
||||
{ name: 'Food and Restaurant', value: '13' },
|
||||
];
|
||||
function getParentElementText(elementHandle) {
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
const textContent = elementHandle
|
||||
? yield elementHandle.evaluate((el) => {
|
||||
const getText = (nodes) => {
|
||||
const result = [];
|
||||
//debugger;
|
||||
for (const node of nodes) {
|
||||
if (node.nodeType === Node.TEXT_NODE && node.nodeValue !== "\n") {
|
||||
result.push(node.nodeValue.replace('\n', ''));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
const parent = el.parentElement;
|
||||
if (!parent)
|
||||
return null;
|
||||
let text = '';
|
||||
const preResult = Array.from(parent.childNodes).find((e) => e.nodeName === 'PRE');
|
||||
if (preResult) {
|
||||
return getText(Array.from(preResult.childNodes));
|
||||
}
|
||||
else {
|
||||
return getText(Array.from(parent.childNodes));
|
||||
}
|
||||
})
|
||||
: null;
|
||||
return textContent ? (textContent.length < 2 ? textContent.join() : textContent) : null;
|
||||
});
|
||||
}
|
||||
function extractListingData(page) {
|
||||
var _a, _b;
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
const labels = {
|
||||
summaryLabel: 'Summary',
|
||||
descriptionLabel: 'Description',
|
||||
categoryLabel: 'Category:',
|
||||
locationLabel: 'Located in:',
|
||||
askingPriceLabel: 'Asking Price:',
|
||||
realEstateLabel: 'Real Estate Included:',
|
||||
salesRevenueLabel: 'Sales revenue:',
|
||||
cashflowLabel: 'Cash flow:',
|
||||
inventoryLabel: 'Inventory:',
|
||||
brokerLabel: 'Broker licensing:',
|
||||
reasonLabel: 'Reason for sale:',
|
||||
employeesLabel: 'Employees:',
|
||||
};
|
||||
const title = (yield page.$eval('div.title', (el) => el.textContent)).trim();
|
||||
const content = {};
|
||||
for (const key of Object.values(labels)) {
|
||||
const element = yield findElementWithText(page, 'div.sub-title', key);
|
||||
try {
|
||||
content[key] = element ? yield getParentElementText(element) : 'N/A';
|
||||
}
|
||||
catch (error) {
|
||||
console.log(`Fehler bei : ${key}`);
|
||||
}
|
||||
}
|
||||
let categoryType;
|
||||
if (content['Category:']) {
|
||||
categoryType = typesOfBusiness.find((t) => t.name.toLowerCase() === content['Category:'].toLowerCase());
|
||||
}
|
||||
else {
|
||||
console.log(`---> No Category ...`);
|
||||
}
|
||||
if (!categoryType) {
|
||||
console.log(`---> ${content['Category:']}`);
|
||||
}
|
||||
try {
|
||||
const listing = {
|
||||
id: 'NA',
|
||||
userId: '1',
|
||||
listingsCategory: 'business',
|
||||
title: title,
|
||||
summary: Array.isArray(content[labels.summaryLabel]) ? content[labels.summaryLabel] : [content[labels.summaryLabel]],
|
||||
description: Array.isArray(content[labels.descriptionLabel]) ? content[labels.descriptionLabel] : [content[labels.descriptionLabel]],
|
||||
type: categoryType.value,
|
||||
location: content[labels.locationLabel],
|
||||
price: (0, currency_js_1.default)(content[labels.askingPriceLabel]).value,
|
||||
salesRevenue: (_a = (0, currency_js_1.default)(content[labels.salesRevenueLabel])) === null || _a === void 0 ? void 0 : _a.value,
|
||||
cashFlow: (_b = (0, currency_js_1.default)(content[labels.cashflowLabel])) === null || _b === void 0 ? void 0 : _b.value,
|
||||
brokerLicencing: content[labels.brokerLabel],
|
||||
established: null,
|
||||
realEstateIncluded: content[labels.realEstateLabel] === 'Yes' ? true : false,
|
||||
inventory: content[labels.inventoryLabel],
|
||||
employees: content[labels.employeesLabel],
|
||||
reasonForSale: content[labels.reasonLabel],
|
||||
internals: '',
|
||||
};
|
||||
return listing;
|
||||
}
|
||||
catch (error) {
|
||||
console.log(`Fehler bei ${title}`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
function findElementWithText(page, selector, text) {
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
const elementHandle = yield page.evaluateHandle((selector, text) => {
|
||||
const elements = Array.from(document.querySelectorAll(selector));
|
||||
return elements.find((element) => { var _a; return ((_a = element.textContent) === null || _a === void 0 ? void 0 : _a.trim()) === text; });
|
||||
}, selector, text);
|
||||
return elementHandle;
|
||||
});
|
||||
}
|
||||
function processPage(browser, url, out) {
|
||||
return __awaiter(this, void 0, void 0, function* () {
|
||||
const page = yield browser.newPage();
|
||||
yield page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||
const listings = yield page.$$('div.ResultsGridItem');
|
||||
for (const listing of listings) {
|
||||
const detailLinkElement = yield listing.$('a.viewListing');
|
||||
if (detailLinkElement) {
|
||||
const detailLink = yield detailLinkElement.evaluate((el) => el.getAttribute('href'));
|
||||
const detailPage = yield browser.newPage();
|
||||
yield detailPage.goto(detailLink, { waitUntil: 'domcontentloaded' });
|
||||
const listingData = yield extractListingData(detailPage);
|
||||
if (listingData) {
|
||||
console.log(JSON.stringify(listingData));
|
||||
out.push(listingData);
|
||||
}
|
||||
yield detailPage.close();
|
||||
}
|
||||
}
|
||||
const nextPageElement = yield page.$('a.next');
|
||||
if (nextPageElement) {
|
||||
let nextPageLink = yield nextPageElement.evaluate((el) => el.getAttribute('href'));
|
||||
if (!nextPageLink.startsWith('https')) {
|
||||
const origin = yield page.evaluate(() => location.origin);
|
||||
nextPageLink = `${origin}${nextPageLink}`;
|
||||
}
|
||||
yield processPage(browser, nextPageLink, out);
|
||||
}
|
||||
yield page.close();
|
||||
});
|
||||
}
|
||||
(() => __awaiter(void 0, void 0, void 0, function* () {
|
||||
const browser = yield puppeteer_1.default.launch({ headless: true, executablePath: '/snap/bin/chromium', devtools: true, slowMo: 50 });
|
||||
//const browser = await puppeteer.launch({devtools: true});
|
||||
const out = [];
|
||||
yield processPage(browser, 'https://www.bizmatch.net/results', out);
|
||||
yield fs_extra_1.default.writeJson('./listings.json', out);
|
||||
yield browser.close();
|
||||
}))();
|
||||
//# sourceMappingURL=index.js.map
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
"use strict";
|
||||
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
||||
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
||||
return new (P || (P = Promise))(function (resolve, reject) {
|
||||
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
||||
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
||||
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
||||
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
||||
});
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
//const argv = yargs(hideBin(process.argv)).argv
|
||||
// if (!argv.userId){
|
||||
// console.log(' --userId [any valid userId]')
|
||||
// process.exit(1)
|
||||
// }
|
||||
(() => __awaiter(void 0, void 0, void 0, function* () {
|
||||
const selectOptionsResponse = yield fetch('http://localhost:3000/bizmatch/select-options', {
|
||||
method: 'GET',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
const selectOptions = yield selectOptionsResponse.json();
|
||||
const response = yield fetch('http://localhost:3000/bizmatch/listings', {
|
||||
method: 'GET',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
const listings = yield response.json();
|
||||
for (const listing of listings) {
|
||||
const option = selectOptions.locations.find(l => l.name.toLowerCase() === listing.location.toLowerCase());
|
||||
if (option) {
|
||||
listing.location = option.value;
|
||||
}
|
||||
const response = yield fetch(`http://localhost:3000/bizmatch/listings/${listing.id}`, {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(listing),
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
}
|
||||
}))();
|
||||
//# sourceMappingURL=updateFields.js.map
|
||||
Loading…
Reference in New Issue