diff --git a/README.md b/README.md
index 90bab31..8d0ff16 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,17 @@
+
# InScraper
### A playwright based LinkedIn based scraper
-This is (currently) a small library built in typescript in order to scrape LinkedIn profiles using the vanity URL (slug or custom URL) using Playwright and Cheerio.
+This is (currently) a small library built in typescript in order to scrape LinkedIn profiles using the vanity URL (slug or custom URL) using Playwright ~~and Cheerio~~.
+
+I'm trying to stick to semver but I'm not sure if I'm doing it right, so please be aware that this library is still in development and the API may change.
+
+## Why?
+I was looking for a way to get some information from LinkedIn profiles and I found that there are some libraries that do that, but they are not maintained anymore and they use Puppeteer, which is a bit slow and heavy. I wanted to try Playwright, which is a new library that is built on top of Chromium, Firefox and Webkit, so it's faster and lighter than Puppeteer. I also wanted to try to use TypeScript, so I decided to build this library. I hope you find it useful.
+
+## Disclaimer
+This library is provided as is, without any warranty. I am not responsible for any misuse of this library. Please be aware that web scraping may be against the terms of service of LinkedIn, try to use a side account because it may get you banned (haven't see that yet but could be).
## Installation
@@ -15,21 +24,18 @@ Using Yarn:
To use the library, you will need to provide a valid LinkedIn cookie. You can obtain this by logging into LinkedIn and inspecting the cookies in your browser, search for the one called `li_at`. Once you have the cookie, you can pass it to the `createClient` function, which will return an instance of the `Client` class.
import { createClient } from 'inscraper';
-
+
const cookieString = 'YOUR_COOKIE_HERE';
const client = await createClient(cookieString);
The `Client` class has the following methods:
-- `getProfile(profileSlug: string)`: Returns the profile information of the user with the given profile slug, including their name, headline, about and experience sections.
-
-- `getExperience(profileSlug: string)`: Returns the experience of the user with the given profile slug.
-
-- `getBrowser(): Browser`: Returns the Playwright browser instance.
+- `getProfile(slug: string)`: Returns the profile information of the user with the given profile slug, including their name, headline, about and experience sections.
-- `getContext(): BrowserContext`: Returns the Playwright context instance.
-
-- `close()`: Closes the Playwright browser.
+- `getExperience(slug: string)`: Returns only the experience of the user with the given profile slug.
+
+- `getScreenshot()`: You can use this method to get a screenshot of the current page. This is useful if you want to see what the page looks like after you have performed some actions. It only works if you have called the `getProfile` or `getExperience` methods before. It's built on top of Playwright's `screenshot` method, so you can pass the same options to it. See [Playwright's documentation](https://playwright.dev/docs/api/class-page#page-screenshot) for more information.
+
Get some profile info:
@@ -41,19 +47,29 @@ const experience = await client.getExperience('profile-slug);
console.log(experience);
```
-You can also use the `getBrowser()` and `getContext()` methods to perform other actions with Playwright and the `close()` method to close the browser when you are done scraping. Note that if the provided cookie is invalid, the library will throw an error, 'Cookies error'
+Note that if the provided cookie is invalid, the library will throw an error, 'Cookies error'
## Full Example
```
-import { createClient } from 'linkedin-scraper';
+import fs from "fs";
+import { PageScreenshotOptions } from 'playwright';
+import { createClient } from "inscraper/client";
const cookieString = 'YOUR_COOKIE_HERE';
const client = await createClient(cookieString);
-const profile = await client.getProfile('profile-slug');
-console.log(profile);
+const slug = 'profile-slug';
-const experience = await client.getExperience('profile-slug');
+const profile = await client.getProfile(slug);
+console.log(profile);
+const options: PageScreenshotOptions = {
+ type: "png",
+ fullPage: true,
+}
+const buffer = await profile.getScreenshot(options)
+fs.writeFileSync(`screenshots/${slug}.png`, buffer);
+
+const experience = await client.getExperience(slug);
console.log(experience);
await client.close()
```
@@ -61,7 +77,7 @@ await client.close()
This library uses Playwright, which is compatible with Chromium, Firefox and Webkit. For this implementation, Chromium is being used.
## Dependencies
-This library depends on playwright and cheerio.
+This library depends on playwright ~~and cheerio~~.
## Contributions
Your contributions are always welcome! Please feel free to submit a pull request or open an issue.
@@ -69,6 +85,11 @@ Your contributions are always welcome! Please feel free to submit a pull request
## Features
- [x] Work with Cookies
- [x] Get basic info from Profile
+- [x] Get Experience from Profile
+- [ ] Get Education from Profile
+- [ ] Get Skills from Profile
+- [ ] Get Recommendations from Profile
+- [x] Get Screenshots of a visited profile
- [ ] Extend it to try and use voyager API (see if that's still a thing)
- [ ] Add a test suite
- [ ] Add more features to this list
@@ -79,7 +100,3 @@ This library is provided under the [MIT License](https://opensource.org/licenses
## Contact
Please feel free to contact me if you have any questions or issues.
-## Additional notes
-Please be aware that web scraping may be against the terms of service of LinkedIn, try to use a side account because it may get you banned (haven't see that yet but could be).
-
-This is a work in progress and only with educational purposes, correct information handling is not a joke.
\ No newline at end of file
diff --git a/dist/client.js b/dist/client.js
index 197648d..700e7ce 100644
--- a/dist/client.js
+++ b/dist/client.js
@@ -1,136 +1,38 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.createClient = exports.Client = void 0;
+const experience_scraper_1 = require("./scrapers/experience.scraper");
const playwright_1 = require("playwright");
-const errors_1 = require("./errors");
+const profile_scraper_1 = require("./scrapers/profile.scraper");
class Client {
- browser;
+ #browser;
context;
- page;
slug;
constructor(browser, context) {
- this.browser = browser;
+ this.#browser = browser;
this.context = context;
- this.page = null;
this.slug = null;
}
async close() {
- await this.browser?.close();
+ await this.#browser?.close();
}
- getContext() {
- return this.context;
+ async getProfile(slug) {
+ this.#prepareToScrape(slug);
+ const scraper = new profile_scraper_1.ProfileScraper(this);
+ return await scraper.scrape();
}
- getBrowser() {
- return this.browser;
- }
- async setPage(url) {
- if (!this.page) {
- this.page = await this.context.newPage();
- }
- if (url) {
- await this.page.goto(url);
- }
- }
- async getProfile(profileSlug) {
- if (!this.page && !profileSlug) {
- throw new Error("No page or slug specified");
- }
- if (profileSlug) {
- if (this.page)
- await this.page.close();
- await this.setPage(`https://www.linkedin.com/in/${profileSlug}`);
- this.slug = profileSlug;
- }
- if (!this.page) {
- /*
- This should never happen but typescript is complaining.
-
- not page and not slug = error
- not page and slug = set page
- page and not slug = page already set
- page and slug = close page and set new page
- */
- throw new Error("No page specified");
- }
- if (await this.page.$("meta[content='auth_wall_desktop_profile']")) {
- throw new errors_1.CookiesError();
- }
- const about = await this.page.locator("#about ~ div.display-flex.ph5.pv3 .inline-show-more-text span[aria-hidden='true']").innerText();
- const name = await this.page.innerText("main section:nth-child(1) h1");
- const headline = await this.page.innerText("main section:nth-child(1) .text-body-medium.break-words");
- const experienceViewMore = await this.page.$("#experience ~ .pvs-list__outer-container .pvs-list__footer-wrapper");
- let experience;
- if (experienceViewMore) {
- experience = await this.getExperience(this.slug);
- }
- else {
- const experienceLocator = await this.page.locator("#experience ~ .pvs-list__outer-container li.pvs-list__item--line-separated div.display-flex.flex-column.full-width.align-self-center").all();
- experience = await Promise.all(experienceLocator.map(async (item) => {
- let description = null;
- const descriptionLocator = item.locator("> div.pvs-list__outer-container > ul.pvs-list");
- if (await descriptionLocator.isVisible()) {
- description = await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list) => list.join(" | "));
- description?.replace(/\r?\n|\r/g, ' ');
- }
- const locationLocator = item.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
- return {
- title: await item.locator("span.mr1.t-bold > span:nth-child(1)").innerText(),
- company: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText(),
- date: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText(),
- location: await locationLocator.isVisible() ? await locationLocator.innerText() : null,
- description,
- };
- }));
- }
- return {
- name,
- headline,
- about,
- experience,
- };
+ async getExperience(slug) {
+ this.#prepareToScrape(slug);
+ const scraper = new experience_scraper_1.ExperienceScraper(this);
+ return await scraper.scrapeAlone();
}
- async getScreenshot(profileSlug) {
- if (!this.page && !profileSlug) {
- throw new Error("No page or slug specified");
+ #prepareToScrape(slug) {
+ if (!slug && !this.slug) {
+ throw new Error("No slug specified");
}
- if (!this.page) {
- this.page = await this.context.newPage();
- await this.page.goto(`https://www.linkedin.com/in/${profileSlug}`, {
- waitUntil: "domcontentloaded",
- });
+ if (slug) {
+ this.slug = slug;
}
- const html = await this.page.content();
- if (html.includes("auth_wall_desktop_profile")) {
- throw new errors_1.CookiesError();
- }
- const buffer = await this.page.screenshot({
- type: "png",
- fullPage: true,
- });
- return buffer;
- }
- async getExperience(slug) {
- const page = await this.context.newPage();
- await page.goto(`https://www.linkedin.com/in/${slug}/details/experience/`);
- const locator = await page.locator(".pvs-list__container ul.pvs-list li.pvs-list__paged-list-item").all();
- const experience = await Promise.all(locator.map(async (item) => {
- let description = null;
- const descriptionLocator = item.locator("div > div > div.align-self-center > div:nth-child(2)");
- if (await descriptionLocator.isVisible()) {
- description = await descriptionLocator.locator("li.pvs-list__item--with-top-padding span:nth-child(1)").innerText();
- description = description.replace(/\r?\n|\r/g, ' ');
- }
- const location = item.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
- return {
- title: await item.locator(".flex-column .align-items-center span > span:nth-child(1)").innerText(),
- company: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText(),
- date: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText(),
- location: await location.isVisible ? await location.innerText() : null,
- description,
- };
- }));
- await page.close();
- return experience;
}
}
exports.Client = Client;
diff --git a/dist/client.js.map b/dist/client.js.map
index f74f47b..e4cd3e0 100644
--- a/dist/client.js.map
+++ b/dist/client.js.map
@@ -1 +1 @@
-{"version":3,"file":"client.js","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";;;AACA,2CAAsC;AAEtC,qCAAwC;AAExC,MAAa,MAAM;IACT,OAAO,CAAU;IACjB,OAAO,CAAiB;IACxB,IAAI,CAAc;IAClB,IAAI,CAAgB;IAE5B,YAAY,OAAgB,EAAE,OAAuB;QACnD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC;IAC9B,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,GAAY;QACxB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACd,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;SAC1C;QACD,IAAI,GAAG,EAAE;YACP,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC3B;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAoB;QACnC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE;YAC9B,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAC9C;QAED,IAAI,WAAW,EAAE;YACf,IAAG,IAAI,CAAC,IAAI;gBAAE,MAAM,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAEtC,MAAM,IAAI,CAAC,OAAO,CAAC,+BAA+B,WAAW,EAAE,CAAC,CAAC;YACjE,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC;SACzB;QAED,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACd;;;;;;;cAOE;YACF,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;SACtC;QAED,IAAI,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,2CAA2C,CAAC,EAAE;YAClE,MAAM,IAAI,qBAAY,EAAE,CAAC;SAC1B;QAED,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,mFAAmF,CAAC,CAAC,SAAS,EAAE,CAAC;QACvI,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,8BAA8B,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,yDAAyD,CAAC,CAAC;QAEtG,MAAM,kBAAkB,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,oEAAoE,CAAC,CAAC;QAEnH,IAAI,UAAwB,CAAC;QAC7B,IAAI,kBAAkB,EAAE;YACtB,UAAU,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,IAAK,CAAC,CAAC;SACnD;aAAM;YACL,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,sIAAsI,CAAC,CAAC,GAAG,EAAE,CAAC;YAChM,UAAU,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;gBAClE,IAAI,WAAW,GAAkB,IAAI,CAAC;gBACtC,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC;gBACzF,IAAI,MAAM,kBAAkB,CAAC,SAAS,EAAE,EAAE;oBACxC,WAAW,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAmB,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;oBAC5I,WAAW,EAAE,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;iBACxC;gBACD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC;gBACxH,OAAO;oBACL,KAAK,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC,SAAS,EAAE;oBAC5E,OAAO,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;oBAC1H,IAAI,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;oBACvH,QAAQ,EAAE,MAAM,eAAe,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,MAAM,eAAe,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI;oBACtF,WAAW;iBACZ,CAAC;YACJ,CAAC,CAAC,CAAC,CAAC;SACL;QAED,OAAO;YACL,IAAI;YACJ,QAAQ;YACR,KAAK;YACL,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,WAAoB;QACtC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE;YAC9B,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;SAC9C;QACD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACd,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACzC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,+BAA+B,WAAW,EAAE,EAAE;gBACjE,SAAS,EAAE,kBAAkB;aAC9B,CAAC,CAAC;SACJ;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,QAAQ,CAAC,2BAA2B,CAAC,EAAE;YAC9C,MAAM,IAAI,qBAAY,EAAE,CAAC;SAC1B;QACD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC;YACxC,IAAI,EAAE,KAAK;YACX,QAAQ,EAAE,IAAI;SACf,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,IAAY;QACtC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC1C,MAAM,IAAI,CAAC,IAAI,CAAC,+BAA+B,IAAI,sBAAsB,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,+DAA+D,CAAC,CAAC,GAAG,EAAE,CAAC;QAC1G,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;YAC9D,IAAI,WAAW,GAAkB,IAAI,CAAC;YACtC,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,sDAAsD,CAAC,CAAC;YAChG,IAAI,MAAM,kBAAkB,CAAC,SAAS,EAAE,EAAE;gBACxC,WAAW,GAAG,MAAM,kBAAkB,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC,SAAS,EAAE,CAAA;gBACnH,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;aACrD;YAED,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC;YACjH,OAAO;gBACL,KAAK,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,2DAA2D,CAAC,CAAC,SAAS,EAAE;gBAClG,OAAO,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;gBAC1H,IAAI,EAAE,MAAM,IAAI,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;gBACvH,QAAQ,EAAE,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI;gBACtE,WAAW;aACZ,CAAC;QACJ,CAAC,CAAC,CAAC,CAAC;QAEJ,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,UAAU,CAAC;IACpB,CAAC;CACF;AAnJD,wBAmJC;AAEM,MAAM,YAAY,GAAG,KAAK,EAAE,YAAoB,EAAmB,EAAE;IAC1E,MAAM,MAAM,GAAW;QACrB,IAAI,EAAE,OAAO;QACb,KAAK,EAAE,YAAY;QACnB,MAAM,EAAE,mBAAmB;QAC3B,IAAI,EAAE,GAAG;QACT,OAAO,EAAE,CAAC,CAAC;QACX,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,IAAI;QACZ,QAAQ,EAAE,MAAM;KACjB,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,EAAE,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;IAC3C,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAEnC,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AACtC,CAAC,CAAC;AAjBW,QAAA,YAAY,gBAiBvB"}
\ No newline at end of file
+{"version":3,"file":"client.js","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";;;AAAA,sEAAkE;AAElE,2CAAsC;AACtC,gEAA4D;AAG5D,MAAa,MAAM;IACjB,QAAQ,CAAU;IAClB,OAAO,CAAiB;IACjB,IAAI,CAAgB;IAE3B,YAAY,OAAgB,EAAE,OAAuB;QACnD,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAa;QAC5B,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,gCAAc,CAAC,IAAI,CAAC,CAAC;QACzC,OAAO,MAAM,OAAO,CAAC,MAAM,EAAE,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,IAAa;QAC/B,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC5B,MAAM,OAAO,GAAG,IAAI,sCAAiB,CAAC,IAAI,CAAC,CAAC;QAC5C,OAAO,MAAM,OAAO,CAAC,WAAW,EAAE,CAAC;IACrC,CAAC;IAED,gBAAgB,CAAC,IAAa;QAC5B,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACvB,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;SACtC;QAED,IAAI,IAAI,EAAE;YACR,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;SAClB;IACH,CAAC;CACF;AApCD,wBAoCC;AAEM,MAAM,YAAY,GAAG,KAAK,EAAE,YAAoB,EAAmB,EAAE;IAC1E,MAAM,MAAM,GAAW;QACrB,IAAI,EAAE,OAAO;QACb,KAAK,EAAE,YAAY;QACnB,MAAM,EAAE,mBAAmB;QAC3B,IAAI,EAAE,GAAG;QACT,OAAO,EAAE,CAAC,CAAC;QACX,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,IAAI;QACZ,QAAQ,EAAE,MAAM;KACjB,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,qBAAQ,CAAC,MAAM,EAAE,CAAC;IACxC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;IAC3C,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAEnC,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AACtC,CAAC,CAAC;AAjBW,QAAA,YAAY,gBAiBvB"}
\ No newline at end of file
diff --git a/dist/scrapers/experience.scraper.js b/dist/scrapers/experience.scraper.js
new file mode 100644
index 0000000..57966eb
--- /dev/null
+++ b/dist/scrapers/experience.scraper.js
@@ -0,0 +1,75 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.ExperienceScraper = void 0;
+const errors_1 = require("../errors");
+const scraper_1 = require("./scraper");
+class ExperienceScraper extends scraper_1.Scraper {
+ constructor(client) {
+ super(client);
+ }
+ get url() {
+ if (!this.slug)
+ throw new Error("No slug specified");
+ return `https://www.linkedin.com/in/${this.slug}/details/experience/`;
+ }
+ createUrlBySlug(slug) {
+ this.updateSlug(slug);
+ return this.url;
+ }
+ async scrapeAlone() {
+ const experience = await this.scrape();
+ return {
+ experience,
+ getScreenshot: (options) => this.getScreenshot(options)
+ };
+ }
+ async scrape() {
+ const page = await this.newPage(this.url);
+ if (await page.$("meta[content='auth_wall_desktop_profile']")) {
+ throw new errors_1.CookiesError();
+ }
+ let experience;
+ try {
+ const locators = await page.locator("section > .pvs-list__container > div > div > ul.pvs-list > li.pvs-list__paged-list-item").all();
+ const experienceArray = await Promise.all(locators.map(async (locator) => {
+ const isMultiple = await locator.locator(".scaffold-finite-scroll__content").isVisible();
+ return isMultiple
+ ? await this.#getMultipleRecords(locator)
+ : await this.#getSingleRecord(locator);
+ }));
+ experience = experienceArray.flat();
+ }
+ catch (error) {
+ console.error(error);
+ return [];
+ }
+ return experience;
+ }
+ async #getMultipleRecords(locator) {
+ const company = await locator.locator(".pvs-list__item--no-padding-when-nested > div:nth-child(2) > div > a span.hoverable-link-text span:nth-child(1)").innerText();
+ const location = await locator.locator(".pvs-list__item--no-padding-when-nested > div:nth-child(2) > div > a span:nth-child(3) span:nth-child(1)").innerText();
+ const expLocators = await locator.locator("> div > div > div:nth-child(2).align-self-center li.pvs-list__paged-list-item").all();
+ const experienceList = await Promise.all(expLocators.map(async (expLocator) => {
+ const title = await expLocator.locator(">div > div > div:nth-child(2).align-self-center a .align-items-center span[aria-hidden='true']").innerText();
+ const date = await expLocator.locator("a span.t-normal span:nth-child(1)").innerText();
+ const description = await expLocator.locator("div.pvs-list__outer-container span:nth-child(1)").allInnerTexts().then((list) => list.join(" | ").replace(/\r?\n|\r/g, ' '));
+ return { title, company, date, location, description };
+ }));
+ return experienceList;
+ }
+ async #getSingleRecord(locator) {
+ const title = await locator.locator(".flex-column .align-items-center span > span:nth-child(1)").innerText();
+ const company = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText();
+ const date = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText();
+ const location = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)").isVisible()
+ ? await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)").innerText()
+ : null;
+ const description = await locator.locator("> div > div > div:nth-child(2).align-self-center > div:nth-child(2).pvs-list__outer-container")
+ .isVisible()
+ ? await locator.locator("li.pvs-list__item--with-top-padding span:nth-child(1)").allInnerTexts().then((list) => list.join(" | ").replace(/\r?\n|\r/g, ' '))
+ : null;
+ return { title, company, date, location, description };
+ }
+}
+exports.ExperienceScraper = ExperienceScraper;
+//# sourceMappingURL=experience.scraper.js.map
\ No newline at end of file
diff --git a/dist/scrapers/experience.scraper.js.map b/dist/scrapers/experience.scraper.js.map
new file mode 100644
index 0000000..ef5c761
--- /dev/null
+++ b/dist/scrapers/experience.scraper.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"experience.scraper.js","sourceRoot":"","sources":["../../src/scrapers/experience.scraper.ts"],"names":[],"mappings":";;;AAGA,sCAAyC;AAEzC,uCAAoC;AAEpC,MAAa,iBAAkB,SAAQ,iBAAO;IAC5C,YAAY,MAAc;QACxB,KAAK,CAAC,MAAM,CAAC,CAAC;IAChB,CAAC;IAED,IAAY,GAAG;QACb,IAAI,CAAC,IAAI,CAAC,IAAI;YAAE,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACrD,OAAO,+BAA+B,IAAI,CAAC,IAAI,sBAAsB,CAAC;IACxE,CAAC;IAED,eAAe,CAAC,IAAY;QAC1B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtB,OAAO,IAAI,CAAC,GAAG,CAAC;IAClB,CAAC;IAED,KAAK,CAAC,WAAW;QACf,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;QACvC,OAAO;YACL,UAAU;YACV,aAAa,EAAE,CAAC,OAA+B,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;SAChF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,MAAM,IAAI,CAAC,CAAC,CAAC,2CAA2C,CAAC,EAAE;YAC7D,MAAM,IAAI,qBAAY,EAAE,CAAC;SAC1B;QAED,IAAI,UAAwB,CAAC;QAC7B,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,yFAAyF,CAAC,CAAC,GAAG,EAAE,CAAC;YACrI,MAAM,eAAe,GAAI,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;gBACxE,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,kCAAkC,CAAC,CAAC,SAAS,EAAE,CAAC;gBACzF,OAAO,UAAU;oBACf,CAAC,CAAC,MAAM,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC;oBACzC,CAAC,CAAC,MAAM,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC,CAAC,CAAC,CAAC;YAEJ,UAAU,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;SACrC;QAAC,OAAO,KAAK,EAAE;YACd,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACrB,OAAO,EAAE,CAAC;SACX;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,mBAAmB,CAAC,OAAgB;QACxC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iHAAiH,CAAC,CAAC,SAAS,EAAE,CAAC;QACrK,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,0GAA0G,CAAC,CAAC,SAAS,EAAE,CAAC;QAC/J,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,+EAA+E,CAAC,CAAC,GAAG,EAAE,CAAC;QACjI,MAAM,cAAc,GAAiB,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,UAAmB,EAAE,EAAE;YACnG,MAAM,KAAK,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,gGAAgG,CAAC,CAAC,SAAS,EAAE,CAAC;YACrJ,MAAM,IAAI,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,mCAAmC,CAAC,CAAC,SAAS,EAAE,CAAC;YACvF,MAAM,WAAW,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,iDAAiD,CAAC,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAc,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC;YACrL,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;QACzD,CAAC,CAAC,CAAC,CAAC;QACJ,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,OAAgB;QACrC,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,2DAA2D,CAAC,CAAC,SAAS,EAAE,CAAC;QAC7G,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE,CAAC;QACrI,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE,CAAC;QAClI,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;YAC7H,CAAC,CAAC,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE;YACtH,CAAC,CAAC,IAAI,CAAC;QACf,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,+FAA+F,CAAC;aACjI,SAAS,EAAE;YACZ,CAAC,CAAC,MAAM,OAAO,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAc,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;YACrK,CAAC,CAAC,IAAI,CAAC;QACf,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;IACzD,CAAC;CACF;AA1ED,8CA0EC"}
\ No newline at end of file
diff --git a/dist/scrapers/profile.scraper.js b/dist/scrapers/profile.scraper.js
new file mode 100644
index 0000000..4b5d0c9
--- /dev/null
+++ b/dist/scrapers/profile.scraper.js
@@ -0,0 +1,94 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.ProfileScraper = void 0;
+const errors_1 = require("../errors");
+const experience_scraper_1 = require("./experience.scraper");
+const scraper_1 = require("./scraper");
+class ProfileScraper extends scraper_1.Scraper {
+ constructor(client) {
+ super(client);
+ }
+ get url() {
+ return `https://www.linkedin.com/in/${this.slug}`;
+ }
+ createUrlBySlug(slug) {
+ this.updateSlug(slug);
+ return this.url;
+ }
+ async scrape() {
+ const page = await this.newPage(this.url);
+ if (await page.$("meta[content='auth_wall_desktop_profile']")) {
+ throw new errors_1.CookiesError();
+ }
+ const experienceViewMore = page.locator("#experience ~ .pvs-list__outer-container .pvs-list__footer-wrapper");
+ let experiencePromise;
+ if (await experienceViewMore.isVisible()) {
+ const experienceScraper = new experience_scraper_1.ExperienceScraper(this.client);
+ experiencePromise = experienceScraper.scrape();
+ }
+ else {
+ // I need to make sure the experience section is loaded before I get the locators
+ await page.waitForSelector("#experience ~ .pvs-list__outer-container > ul > li", { state: "attached" });
+ const experienceItemsLocator = await page.locator("#experience ~ .pvs-list__outer-container > ul > li").all();
+ experiencePromise = this.#getExperience(experienceItemsLocator);
+ }
+ const experience = await experiencePromise;
+ const about = await page.locator("#about ~ div.display-flex.ph5.pv3 .inline-show-more-text span[aria-hidden='true']")
+ .innerText()
+ .then((text) => text.replace(/\r?\n|\r/g, ' '));
+ const name = await page.innerText("main section:nth-child(1) h1");
+ const headline = await page.innerText("main section:nth-child(1) .text-body-medium.break-words");
+ return {
+ name,
+ headline,
+ about,
+ experience,
+ getScreenshot: (options) => this.getScreenshot(options)
+ };
+ }
+ async #getExperience(locators) {
+ const experienceList = await Promise.all(locators.map(async (locator) => {
+ const isMultiple = await locator.locator("ul ul ul li.pvs-list__item--with-top-padding").count();
+ return isMultiple
+ ? await this.#getExperienceMultipleRecords(locator)
+ : await this.#getExperiencSingleRecord(locator);
+ }));
+ return experienceList.flat();
+ }
+ async #getExperiencSingleRecord(locator) {
+ try {
+ const locationLocator = locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
+ const descriptionLocator = locator.locator("> div > div > div > ul.pvs-list");
+ const title = await locator.locator("span.mr1.t-bold > span:nth-child(1)").innerText();
+ const company = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText();
+ const date = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText();
+ const location = await locationLocator.isVisible() ? await locationLocator.innerText() : null;
+ const description = await descriptionLocator.isVisible() ? await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list) => list.join(" | ").replace(/\r?\n|\r/g, ' ')) : null;
+ return { title, company, date, location, description };
+ }
+ catch (error) {
+ console.error(error);
+ throw error;
+ }
+ }
+ async #getExperienceMultipleRecords(locator) {
+ try {
+ const company = await locator.locator("> div > div > div > a > div > .mr1 > span:nth-child(1)").innerText();
+ const location = await locator.locator("> div > div > div > a > .t-black--light > span:nth-child(1)").innerText();
+ const locators = await locator.locator("> div > div > .pvs-list__outer-container > ul > li").all();
+ return await Promise.all(locators.map(async (innerLocator) => {
+ const descriptionLocator = innerLocator.locator(".align-self-center > .pvs-list__outer-container > ul");
+ const title = await innerLocator.locator(".align-self-center > .justify-space-between a span:nth-child(1) > span:nth-child(1)").innerText();
+ const date = await innerLocator.locator(".align-self-center > .justify-space-between a span:nth-child(2) > span:nth-child(1)").innerText();
+ const description = await descriptionLocator.isVisible() ? await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list) => list.join(" | ").replace(/\r?\n|\r/g, ' ')) : null;
+ return { title, company, date, location, description };
+ }));
+ }
+ catch (error) {
+ console.error(error);
+ throw error;
+ }
+ }
+}
+exports.ProfileScraper = ProfileScraper;
+//# sourceMappingURL=profile.scraper.js.map
\ No newline at end of file
diff --git a/dist/scrapers/profile.scraper.js.map b/dist/scrapers/profile.scraper.js.map
new file mode 100644
index 0000000..30f1c8d
--- /dev/null
+++ b/dist/scrapers/profile.scraper.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"profile.scraper.js","sourceRoot":"","sources":["../../src/scrapers/profile.scraper.ts"],"names":[],"mappings":";;;AAEA,sCAAyC;AAEzC,6DAAyD;AACzD,uCAAoC;AAEpC,MAAa,cAAe,SAAQ,iBAAO;IACzC,YAAY,MAAc;QACxB,KAAK,CAAC,MAAM,CAAC,CAAC;IAChB,CAAC;IAED,IAAY,GAAG;QACb,OAAO,+BAA+B,IAAI,CAAC,IAAI,EAAE,CAAC;IACpD,CAAC;IAED,eAAe,CAAC,IAAY;QAC1B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtB,OAAO,IAAI,CAAC,GAAG,CAAC;IAClB,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC1C,IAAI,MAAM,IAAI,CAAC,CAAC,CAAC,2CAA2C,CAAC,EAAE;YAC7D,MAAM,IAAI,qBAAY,EAAE,CAAC;SAC1B;QAED,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,oEAAoE,CAAC,CAAC;QAE9G,IAAI,iBAAwC,CAAC;QAE7C,IAAI,MAAM,kBAAkB,CAAC,SAAS,EAAE,EAAE;YACxC,MAAM,iBAAiB,GAAG,IAAI,sCAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC7D,iBAAiB,GAAG,iBAAiB,CAAC,MAAM,EAAE,CAAC;SAChD;aAAM;YACL,iFAAiF;YACjF,MAAM,IAAI,CAAC,eAAe,CAAC,oDAAoD,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YACxG,MAAM,sBAAsB,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9G,iBAAiB,GAAG,IAAI,CAAC,cAAc,CAAC,sBAAsB,CAAC,CAAC;SACjE;QAED,MAAM,UAAU,GAAG,MAAM,iBAAiB,CAAC;QAE3C,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,mFAAmF,CAAC;aAClH,SAAS,EAAE;aACX,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,8BAA8B,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,yDAAyD,CAAC,CAAC;QAEjG,OAAO;YACL,IAAI;YACJ,QAAQ;YACR,KAAK;YACL,UAAU;YACV,aAAa,EAAE,CAAC,OAA+B,EAAE,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;SAChF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,QAAmB;QACtC,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;YACtE,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,8CAA8C,CAAC,CAAC,KAAK,EAAE,CAAC;YACjG,OAAO,UAAU;gBACb,CAAC,CAAC,MAAM,IAAI,CAAC,6BAA6B,CAAC,OAAO,CAAC;gBACnD,CAAC,CAAC,MAAM,IAAI,CAAC,yBAAyB,CAAC,OAAO,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC,CAAC;QAEJ,OAAO,cAAc,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,yBAAyB,CAAC,OAAgB;QAC9C,IAAI;YACF,MAAM,eAAe,GAAG,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC;YAC3H,MAAM,kBAAkB,GAAG,OAAO,CAAC,OAAO,CAAC,iCAAiC,CAAC,CAAC;YAE9E,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC,SAAS,EAAE,CAAC;YACvF,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE,CAAC;YACrI,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,iFAAiF,CAAC,CAAC,SAAS,EAAE,CAAC;YAClI,MAAM,QAAQ,GAAG,MAAM,eAAe,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,MAAM,eAAe,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;YAC7F,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,MAAM,kBAAkB,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAmB,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1N,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;SACxD;QAAC,OAAO,KAAK,EAAE;YACd,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACrB,MAAM,KAAK,CAAC;SACb;IACH,CAAC;IAED,KAAK,CAAC,6BAA6B,CAAC,OAAgB;QAClD,IAAI;YACF,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,wDAAwD,CAAC,CAAC,SAAS,EAAE,CAAC;YAC5G,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,6DAA6D,CAAC,CAAC,SAAS,EAAE,CAAC;YAClH,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,oDAAoD,CAAC,CAAC,GAAG,EAAE,CAAC;YAEnG,OAAO,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,YAAqB,EAAE,EAAE;gBACpE,MAAM,kBAAkB,GAAG,YAAY,CAAC,OAAO,CAAC,sDAAsD,CAAC,CAAC;gBAExG,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,OAAO,CAAC,qFAAqF,CAAC,CAAC,SAAS,EAAE,CAAC;gBAC5I,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,OAAO,CAAC,qFAAqF,CAAC,CAAC,SAAS,EAAE,CAAC;gBAC3I,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,MAAM,kBAAkB,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC,IAAmB,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC1N,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,CAAC;YACzD,CAAC,CAAC,CAAC,CAAC;SACL;QAAC,OAAO,KAAK,EAAE;YACd,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACrB,MAAM,KAAK,CAAC;SACb;IACH,CAAC;CACF;AAlGD,wCAkGC"}
\ No newline at end of file
diff --git a/dist/scrapers/scraper.js b/dist/scrapers/scraper.js
new file mode 100644
index 0000000..debd1f2
--- /dev/null
+++ b/dist/scrapers/scraper.js
@@ -0,0 +1,31 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.Scraper = void 0;
+class Scraper {
+ client;
+ slug;
+ page;
+ constructor(client) {
+ this.client = client;
+ this.slug = client.slug;
+ this.page = null;
+ }
+ async newPage(url) {
+ this.page = await this.client.context.newPage();
+ if (url) {
+ await this.page.goto(url);
+ }
+ return this.page;
+ }
+ updateSlug(slug) {
+ this.slug = slug;
+ }
+ async getScreenshot(options) {
+ if (!this.page) {
+ throw new Error("Page not initialized");
+ }
+ return await this.page.screenshot(options);
+ }
+}
+exports.Scraper = Scraper;
+//# sourceMappingURL=scraper.js.map
\ No newline at end of file
diff --git a/dist/scrapers/scraper.js.map b/dist/scrapers/scraper.js.map
new file mode 100644
index 0000000..bae284a
--- /dev/null
+++ b/dist/scrapers/scraper.js.map
@@ -0,0 +1 @@
+{"version":3,"file":"scraper.js","sourceRoot":"","sources":["../../src/scrapers/scraper.ts"],"names":[],"mappings":";;;AAGA,MAAsB,OAAO;IACjB,MAAM,CAAS;IACf,IAAI,CAAgB;IACpB,IAAI,CAAc;IAE5B,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC;QACxB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAID,KAAK,CAAC,OAAO,CAAC,GAAY;QACxB,IAAI,CAAC,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAChD,IAAI,GAAG,EAAE;YACP,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAC3B;QAED,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAES,UAAU,CAAC,IAAY;QAC/B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,OAA+B;QACjD,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;SACzC;QAED,OAAO,MAAM,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;CACF;AAjCD,0BAiCC"}
\ No newline at end of file
diff --git a/dist/types/client.d.ts b/dist/types/client.d.ts
index 4bdd972..808d819 100644
--- a/dist/types/client.d.ts
+++ b/dist/types/client.d.ts
@@ -1,18 +1,12 @@
-///
import { Browser, BrowserContext } from "playwright";
-import { Profile } from "./types";
+import { JobExperience, Profile } from "./types";
export declare class Client {
- private browser;
- private context;
- private page;
- private slug;
+ #private;
+ context: BrowserContext;
+ slug: string | null;
constructor(browser: Browser, context: BrowserContext);
close(): Promise;
- getContext(): BrowserContext;
- getBrowser(): Browser;
- setPage(url?: string): Promise;
- getProfile(profileSlug?: string): Promise;
- getScreenshot(profileSlug?: string): Promise;
- private getExperience;
+ getProfile(slug?: string): Promise;
+ getExperience(slug?: string): Promise;
}
export declare const createClient: (cookieString: string) => Promise;
diff --git a/dist/types/scrapers/experience.scraper.d.ts b/dist/types/scrapers/experience.scraper.d.ts
new file mode 100644
index 0000000..c5e4ed1
--- /dev/null
+++ b/dist/types/scrapers/experience.scraper.d.ts
@@ -0,0 +1,12 @@
+import { JobExperience } from './../types';
+import { Client } from "../client";
+import { Experience } from "../types";
+import { Scraper } from "./scraper";
+export declare class ExperienceScraper extends Scraper {
+ #private;
+ constructor(client: Client);
+ private get url();
+ createUrlBySlug(slug: string): string;
+ scrapeAlone(): Promise;
+ scrape(): Promise;
+}
diff --git a/dist/types/scrapers/profile.scraper.d.ts b/dist/types/scrapers/profile.scraper.d.ts
new file mode 100644
index 0000000..0892622
--- /dev/null
+++ b/dist/types/scrapers/profile.scraper.d.ts
@@ -0,0 +1,10 @@
+import { Client } from "../client";
+import { Profile } from "../types";
+import { Scraper } from "./scraper";
+export declare class ProfileScraper extends Scraper {
+ #private;
+ constructor(client: Client);
+ private get url();
+ createUrlBySlug(slug: string): string;
+ scrape(): Promise;
+}
diff --git a/dist/types/scrapers/scraper.d.ts b/dist/types/scrapers/scraper.d.ts
new file mode 100644
index 0000000..4d57408
--- /dev/null
+++ b/dist/types/scrapers/scraper.d.ts
@@ -0,0 +1,13 @@
+///
+import { Page, PageScreenshotOptions } from "playwright";
+import { Client } from "../client";
+export declare abstract class Scraper {
+ protected client: Client;
+ protected slug: string | null;
+ protected page: Page | null;
+ constructor(client: Client);
+ abstract createUrlBySlug(slug: string): string;
+ newPage(url?: string): Promise;
+ protected updateSlug(slug: string): void;
+ getScreenshot(options?: PageScreenshotOptions): Promise;
+}
diff --git a/dist/types/types.d.ts b/dist/types/types.d.ts
index 141f862..da26f74 100644
--- a/dist/types/types.d.ts
+++ b/dist/types/types.d.ts
@@ -1,3 +1,5 @@
+///
+import { PageScreenshotOptions } from "playwright";
export interface Experience {
title: string;
company: string;
@@ -10,4 +12,9 @@ export interface Profile {
headline: string;
about: string;
experience: Experience[];
+ getScreenshot: (options?: PageScreenshotOptions) => Promise;
+}
+export interface JobExperience {
+ experience: Experience[];
+ getScreenshot: (options?: PageScreenshotOptions) => Promise;
}
diff --git a/package.json b/package.json
index 1b1d4b3..2230a44 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "inscraper",
- "version": "1.1.0",
+ "version": "1.2.0",
"description": "A playwright based scraper for linked in profiles (for now)",
"main": "dist/main.js",
"types": "dist/types/main.d.ts",
@@ -10,11 +10,11 @@
"scripts": {
"start": "nodemon --watch src --exec \"ts-node\" src/main.ts",
"build": "tsc --declaration",
- "test": "echo \"Error: no test specified\" && exit 1"
+ "test": "echo \"Error: no test specified\" && exit 1",
+ "local": "nodemon local/main.ts"
},
"dependencies": {
"@types/node": "^18.11.18",
- "cheerio": "^1.0.0-rc.12",
"nodemon": "^2.0.20",
"playwright": "^1.30.0"
},
diff --git a/src/client.ts b/src/client.ts
index 2453a3f..2be2657 100644
--- a/src/client.ts
+++ b/src/client.ts
@@ -1,154 +1,44 @@
-import { Browser, BrowserContext, Cookie, Page } from "playwright";
+import { ExperienceScraper } from './scrapers/experience.scraper';
+import { Browser, BrowserContext, Cookie, Page, PageScreenshotOptions } from "playwright";
import { chromium } from "playwright";
-import { Experience, Profile } from "./types";
-import { CookiesError } from "./errors";
+import { ProfileScraper } from "./scrapers/profile.scraper";
+import { Experience, JobExperience, Profile } from "./types";
export class Client {
- private browser: Browser;
- private context: BrowserContext;
- private page: Page | null;
- private slug: string | null;
+ #browser: Browser;
+ context: BrowserContext;
+ public slug: string | null;
constructor(browser: Browser, context: BrowserContext) {
- this.browser = browser;
+ this.#browser = browser;
this.context = context;
- this.page = null;
this.slug = null;
}
async close() {
- await this.browser?.close();
+ await this.#browser?.close();
}
- getContext(): BrowserContext {
- return this.context;
+ async getProfile(slug?: string): Promise {
+ this.#prepareToScrape(slug);
+ const scraper = new ProfileScraper(this);
+ return await scraper.scrape();
}
- getBrowser(): Browser {
- return this.browser;
+ async getExperience(slug?: string): Promise {
+ this.#prepareToScrape(slug);
+ const scraper = new ExperienceScraper(this);
+ return await scraper.scrapeAlone();
}
- async setPage(url?: string): Promise {
- if (!this.page) {
- this.page = await this.context.newPage();
+ #prepareToScrape(slug?: string) {
+ if (!slug && !this.slug) {
+ throw new Error("No slug specified");
}
- if (url) {
- await this.page.goto(url);
- }
- }
-
- async getProfile(profileSlug?: string): Promise {
- if (!this.page && !profileSlug) {
- throw new Error("No page or slug specified");
- }
-
- if (profileSlug) {
- if(this.page) await this.page.close();
-
- await this.setPage(`https://www.linkedin.com/in/${profileSlug}`);
- this.slug = profileSlug;
- }
-
- if (!this.page) {
- /*
- This should never happen but typescript is complaining.
-
- not page and not slug = error
- not page and slug = set page
- page and not slug = page already set
- page and slug = close page and set new page
- */
- throw new Error("No page specified");
- }
-
- if (await this.page.$("meta[content='auth_wall_desktop_profile']")) {
- throw new CookiesError();
- }
-
- const about = await this.page.locator("#about ~ div.display-flex.ph5.pv3 .inline-show-more-text span[aria-hidden='true']").innerText();
- const name = await this.page.innerText("main section:nth-child(1) h1");
- const headline = await this.page.innerText("main section:nth-child(1) .text-body-medium.break-words");
-
- const experienceViewMore = await this.page.$("#experience ~ .pvs-list__outer-container .pvs-list__footer-wrapper");
- let experience: Experience[];
- if (experienceViewMore) {
- experience = await this.getExperience(this.slug!);
- } else {
- const experienceLocator = await this.page.locator("#experience ~ .pvs-list__outer-container li.pvs-list__item--line-separated div.display-flex.flex-column.full-width.align-self-center").all();
- experience = await Promise.all(experienceLocator.map(async (item) => {
- let description: string | null = null;
- const descriptionLocator = item.locator("> div.pvs-list__outer-container > ul.pvs-list");
- if (await descriptionLocator.isVisible()) {
- description = await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list: Array) => list.join(" | "));
- description?.replace(/\r?\n|\r/g, ' ');
- }
- const locationLocator = item.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
- return {
- title: await item.locator("span.mr1.t-bold > span:nth-child(1)").innerText(),
- company: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText(),
- date: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText(),
- location: await locationLocator.isVisible() ? await locationLocator.innerText() : null,
- description,
- };
- }));
+ if (slug) {
+ this.slug = slug;
}
-
- return {
- name,
- headline,
- about,
- experience,
- };
- }
-
- async getScreenshot(profileSlug?: string): Promise {
- if (!this.page && !profileSlug) {
- throw new Error("No page or slug specified");
- }
- if (!this.page) {
- this.page = await this.context.newPage();
- await this.page.goto(`https://www.linkedin.com/in/${profileSlug}`, {
- waitUntil: "domcontentloaded",
- });
- }
-
- const html = await this.page.content();
- if (html.includes("auth_wall_desktop_profile")) {
- throw new CookiesError();
- }
- const buffer = await this.page.screenshot({
- type: "png",
- fullPage: true,
- });
-
- return buffer;
- }
-
- private async getExperience(slug: string): Promise {
- const page = await this.context.newPage();
- await page.goto(`https://www.linkedin.com/in/${slug}/details/experience/`);
- const locator = await page.locator(".pvs-list__container ul.pvs-list li.pvs-list__paged-list-item").all();
- const experience = await Promise.all(locator.map(async (item) => {
- let description: string | null = null;
- const descriptionLocator = item.locator("div > div > div.align-self-center > div:nth-child(2)");
- if (await descriptionLocator.isVisible()) {
- description = await descriptionLocator.locator("li.pvs-list__item--with-top-padding span:nth-child(1)").innerText()
- description = description.replace(/\r?\n|\r/g, ' ');
- }
-
- const location = item.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
- return {
- title: await item.locator(".flex-column .align-items-center span > span:nth-child(1)").innerText(),
- company: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText(),
- date: await item.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText(),
- location: await location.isVisible ? await location.innerText() : null,
- description,
- };
- }));
-
- await page.close();
- return experience;
}
}
diff --git a/src/scrapers/experience.scraper.ts b/src/scrapers/experience.scraper.ts
new file mode 100644
index 0000000..d2ba365
--- /dev/null
+++ b/src/scrapers/experience.scraper.ts
@@ -0,0 +1,82 @@
+import { JobExperience } from './../types';
+import { Locator, Page, PageScreenshotOptions } from "playwright";
+import { Client } from "../client";
+import { CookiesError } from "../errors";
+import { Experience } from "../types";
+import { Scraper } from "./scraper";
+
+export class ExperienceScraper extends Scraper {
+ constructor(client: Client) {
+ super(client);
+ }
+
+ private get url(): string {
+ if (!this.slug) throw new Error("No slug specified");
+ return `https://www.linkedin.com/in/${this.slug}/details/experience/`;
+ }
+
+ createUrlBySlug(slug: string): string {
+ this.updateSlug(slug);
+ return this.url;
+ }
+
+ async scrapeAlone(): Promise {
+ const experience = await this.scrape();
+ return {
+ experience,
+ getScreenshot: (options?: PageScreenshotOptions) => this.getScreenshot(options)
+ };
+ }
+
+ async scrape(): Promise {
+ const page = await this.newPage(this.url);
+ if (await page.$("meta[content='auth_wall_desktop_profile']")) {
+ throw new CookiesError();
+ }
+
+ let experience: Experience[];
+ try {
+ const locators = await page.locator("section > .pvs-list__container > div > div > ul.pvs-list > li.pvs-list__paged-list-item").all();
+ const experienceArray = await Promise.all(locators.map(async (locator) => {
+ const isMultiple = await locator.locator(".scaffold-finite-scroll__content").isVisible();
+ return isMultiple
+ ? await this.#getMultipleRecords(locator)
+ : await this.#getSingleRecord(locator);
+ }));
+
+ experience = experienceArray.flat();
+ } catch (error) {
+ console.error(error);
+ return [];
+ }
+
+ return experience;
+ }
+
+ async #getMultipleRecords(locator: Locator): Promise {
+ const company = await locator.locator(".pvs-list__item--no-padding-when-nested > div:nth-child(2) > div > a span.hoverable-link-text span:nth-child(1)").innerText();
+ const location = await locator.locator(".pvs-list__item--no-padding-when-nested > div:nth-child(2) > div > a span:nth-child(3) span:nth-child(1)").innerText();
+ const expLocators = await locator.locator("> div > div > div:nth-child(2).align-self-center li.pvs-list__paged-list-item").all();
+ const experienceList: Experience[] = await Promise.all(expLocators.map(async (expLocator: Locator) => {
+ const title = await expLocator.locator(">div > div > div:nth-child(2).align-self-center a .align-items-center span[aria-hidden='true']").innerText();
+ const date = await expLocator.locator("a span.t-normal span:nth-child(1)").innerText();
+ const description = await expLocator.locator("div.pvs-list__outer-container span:nth-child(1)").allInnerTexts().then((list: string[]) => list.join(" | ").replace(/\r?\n|\r/g, ' '));
+ return { title, company, date, location, description };
+ }));
+ return experienceList;
+ }
+
+ async #getSingleRecord(locator: Locator): Promise {
+ const title = await locator.locator(".flex-column .align-items-center span > span:nth-child(1)").innerText();
+ const company = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText();
+ const date = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText();
+ const location = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)").isVisible()
+ ? await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)").innerText()
+ : null;
+ const description = await locator.locator("> div > div > div:nth-child(2).align-self-center > div:nth-child(2).pvs-list__outer-container")
+ .isVisible()
+ ? await locator.locator("li.pvs-list__item--with-top-padding span:nth-child(1)").allInnerTexts().then((list: string[]) => list.join(" | ").replace(/\r?\n|\r/g, ' '))
+ : null;
+ return { title, company, date, location, description };
+ }
+}
\ No newline at end of file
diff --git a/src/scrapers/profile.scraper.ts b/src/scrapers/profile.scraper.ts
new file mode 100644
index 0000000..4b785c1
--- /dev/null
+++ b/src/scrapers/profile.scraper.ts
@@ -0,0 +1,106 @@
+import { Locator, PageScreenshotOptions } from "playwright";
+import { Client } from "../client";
+import { CookiesError } from "../errors";
+import { Experience, Profile } from "../types";
+import { ExperienceScraper } from "./experience.scraper";
+import { Scraper } from "./scraper";
+
+export class ProfileScraper extends Scraper {
+ constructor(client: Client) {
+ super(client);
+ }
+
+ private get url(): string {
+ return `https://www.linkedin.com/in/${this.slug}`;
+ }
+
+ createUrlBySlug(slug: string): string {
+ this.updateSlug(slug);
+ return this.url;
+ }
+
+ async scrape(): Promise {
+ const page = await this.newPage(this.url);
+ if (await page.$("meta[content='auth_wall_desktop_profile']")) {
+ throw new CookiesError();
+ }
+
+ const experienceViewMore = page.locator("#experience ~ .pvs-list__outer-container .pvs-list__footer-wrapper");
+
+ let experiencePromise: Promise;
+
+ if (await experienceViewMore.isVisible()) {
+ const experienceScraper = new ExperienceScraper(this.client);
+ experiencePromise = experienceScraper.scrape();
+ } else {
+ // I need to make sure the experience section is loaded before I get the locators
+ await page.waitForSelector("#experience ~ .pvs-list__outer-container > ul > li", { state: "attached" });
+ const experienceItemsLocator = await page.locator("#experience ~ .pvs-list__outer-container > ul > li").all();
+ experiencePromise = this.#getExperience(experienceItemsLocator);
+ }
+
+ const experience = await experiencePromise;
+
+ const about = await page.locator("#about ~ div.display-flex.ph5.pv3 .inline-show-more-text span[aria-hidden='true']")
+ .innerText()
+ .then((text) => text.replace(/\r?\n|\r/g, ' '));
+ const name = await page.innerText("main section:nth-child(1) h1");
+ const headline = await page.innerText("main section:nth-child(1) .text-body-medium.break-words");
+
+ return {
+ name,
+ headline,
+ about,
+ experience,
+ getScreenshot: (options?: PageScreenshotOptions) => this.getScreenshot(options)
+ };
+ }
+
+ async #getExperience(locators: Locator[]): Promise {
+ const experienceList = await Promise.all(locators.map(async (locator) => {
+ const isMultiple = await locator.locator("ul ul ul li.pvs-list__item--with-top-padding").count();
+ return isMultiple
+ ? await this.#getExperienceMultipleRecords(locator)
+ : await this.#getExperiencSingleRecord(locator);
+ }));
+
+ return experienceList.flat();
+ }
+
+ async #getExperiencSingleRecord(locator: Locator): Promise {
+ try {
+ const locationLocator = locator.locator("div.display-flex.flex-column.full-width > span:nth-child(4) > span:nth-child(1)");
+ const descriptionLocator = locator.locator("> div > div > div > ul.pvs-list");
+
+ const title = await locator.locator("span.mr1.t-bold > span:nth-child(1)").innerText();
+ const company = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(2) > span:nth-child(1)").innerText();
+ const date = await locator.locator("div.display-flex.flex-column.full-width > span:nth-child(3) > span:nth-child(1)").innerText();
+ const location = await locationLocator.isVisible() ? await locationLocator.innerText() : null
+ const description = await descriptionLocator.isVisible() ? await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list: Array) => list.join(" | ").replace(/\r?\n|\r/g, ' ')) : null;
+ return { title, company, date, location, description };
+ } catch (error) {
+ console.error(error);
+ throw error;
+ }
+ }
+
+ async #getExperienceMultipleRecords(locator: Locator): Promise {
+ try {
+ const company = await locator.locator("> div > div > div > a > div > .mr1 > span:nth-child(1)").innerText();
+ const location = await locator.locator("> div > div > div > a > .t-black--light > span:nth-child(1)").innerText();
+ const locators = await locator.locator("> div > div > .pvs-list__outer-container > ul > li").all();
+
+ return await Promise.all(locators.map(async (innerLocator: Locator) => {
+ const descriptionLocator = innerLocator.locator(".align-self-center > .pvs-list__outer-container > ul");
+
+ const title = await innerLocator.locator(".align-self-center > .justify-space-between a span:nth-child(1) > span:nth-child(1)").innerText();
+ const date = await innerLocator.locator(".align-self-center > .justify-space-between a span:nth-child(2) > span:nth-child(1)").innerText();
+ const description = await descriptionLocator.isVisible() ? await descriptionLocator.locator("li span[aria-hidden=true]").allInnerTexts().then((list: Array) => list.join(" | ").replace(/\r?\n|\r/g, ' ')) : null;
+ return { title, company, date, location, description };
+ }));
+ } catch (error) {
+ console.error(error);
+ throw error;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/scrapers/scraper.ts b/src/scrapers/scraper.ts
new file mode 100644
index 0000000..90f94f2
--- /dev/null
+++ b/src/scrapers/scraper.ts
@@ -0,0 +1,37 @@
+import { Page, PageScreenshotOptions } from "playwright";
+import { Client } from "../client";
+
+export abstract class Scraper {
+ protected client: Client;
+ protected slug: string | null;
+ protected page: Page | null;
+
+ constructor(client: Client) {
+ this.client = client;
+ this.slug = client.slug;
+ this.page = null;
+ }
+
+ abstract createUrlBySlug(slug: string): string;
+
+ async newPage(url?: string): Promise {
+ this.page = await this.client.context.newPage();
+ if (url) {
+ await this.page.goto(url);
+ }
+
+ return this.page;
+ }
+
+ protected updateSlug(slug: string) {
+ this.slug = slug;
+ }
+
+ async getScreenshot(options?: PageScreenshotOptions): Promise {
+ if (!this.page) {
+ throw new Error("Page not initialized");
+ }
+
+ return await this.page.screenshot(options);
+ }
+}
diff --git a/src/types.ts b/src/types.ts
index 6c7de3f..6c73751 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,3 +1,5 @@
+import { PageScreenshotOptions } from "playwright";
+
export interface Experience {
title: string;
company: string;
@@ -11,4 +13,10 @@ export interface Profile {
headline: string;
about: string;
experience: Experience[];
+ getScreenshot: (options?: PageScreenshotOptions) => Promise;
+}
+
+export interface JobExperience {
+ experience: Experience[];
+ getScreenshot: (options?: PageScreenshotOptions) => Promise;
}
diff --git a/yarn.lock b/yarn.lock
index 50ef859..05ee405 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -30,11 +30,6 @@ binary-extensions@^2.0.0:
resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.2.0.tgz#75f502eeaf9ffde42fc98829645be4ea76bd9e2d"
integrity sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==
-boolbase@^1.0.0:
- version "1.0.0"
- resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
- integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==
-
brace-expansion@^1.1.7:
version "1.1.11"
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
@@ -50,31 +45,6 @@ braces@~3.0.2:
dependencies:
fill-range "^7.0.1"
-cheerio-select@^2.1.0:
- version "2.1.0"
- resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
- integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
- dependencies:
- boolbase "^1.0.0"
- css-select "^5.1.0"
- css-what "^6.1.0"
- domelementtype "^2.3.0"
- domhandler "^5.0.3"
- domutils "^3.0.1"
-
-cheerio@^1.0.0-rc.12:
- version "1.0.0-rc.12"
- resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
- integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
- dependencies:
- cheerio-select "^2.1.0"
- dom-serializer "^2.0.0"
- domhandler "^5.0.3"
- domutils "^3.0.1"
- htmlparser2 "^8.0.1"
- parse5 "^7.0.0"
- parse5-htmlparser2-tree-adapter "^7.0.0"
-
chokidar@^3.5.2:
version "3.5.3"
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd"
@@ -95,22 +65,6 @@ concat-map@0.0.1:
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==
-css-select@^5.1.0:
- version "5.1.0"
- resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
- integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==
- dependencies:
- boolbase "^1.0.0"
- css-what "^6.1.0"
- domhandler "^5.0.2"
- domutils "^3.0.1"
- nth-check "^2.0.1"
-
-css-what@^6.1.0:
- version "6.1.0"
- resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4"
- integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==
-
debug@^3.2.7:
version "3.2.7"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
@@ -118,41 +72,6 @@ debug@^3.2.7:
dependencies:
ms "^2.1.1"
-dom-serializer@^2.0.0:
- version "2.0.0"
- resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53"
- integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==
- dependencies:
- domelementtype "^2.3.0"
- domhandler "^5.0.2"
- entities "^4.2.0"
-
-domelementtype@^2.3.0:
- version "2.3.0"
- resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d"
- integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==
-
-domhandler@^5.0.1, domhandler@^5.0.2, domhandler@^5.0.3:
- version "5.0.3"
- resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31"
- integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==
- dependencies:
- domelementtype "^2.3.0"
-
-domutils@^3.0.1:
- version "3.0.1"
- resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.0.1.tgz#696b3875238338cb186b6c0612bd4901c89a4f1c"
- integrity sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q==
- dependencies:
- dom-serializer "^2.0.0"
- domelementtype "^2.3.0"
- domhandler "^5.0.1"
-
-entities@^4.2.0, entities@^4.3.0, entities@^4.4.0:
- version "4.4.0"
- resolved "https://registry.yarnpkg.com/entities/-/entities-4.4.0.tgz#97bdaba170339446495e653cfd2db78962900174"
- integrity sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==
-
fill-range@^7.0.1:
version "7.0.1"
resolved "https://registry.yarnpkg.com/fill-range/-/fill-range-7.0.1.tgz#1919a6a7c75fe38b2c7c77e5198535da9acdda40"
@@ -177,16 +96,6 @@ has-flag@^3.0.0:
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
integrity sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==
-htmlparser2@^8.0.1:
- version "8.0.1"
- resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.1.tgz#abaa985474fcefe269bc761a779b544d7196d010"
- integrity sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA==
- dependencies:
- domelementtype "^2.3.0"
- domhandler "^5.0.2"
- domutils "^3.0.1"
- entities "^4.3.0"
-
ignore-by-default@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/ignore-by-default/-/ignore-by-default-1.0.1.tgz#48ca6d72f6c6a3af00a9ad4ae6876be3889e2b09"
@@ -256,28 +165,6 @@ normalize-path@^3.0.0, normalize-path@~3.0.0:
resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-3.0.0.tgz#0dcd69ff23a1c9b11fd0978316644a0388216a65"
integrity sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==
-nth-check@^2.0.1:
- version "2.1.1"
- resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d"
- integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==
- dependencies:
- boolbase "^1.0.0"
-
-parse5-htmlparser2-tree-adapter@^7.0.0:
- version "7.0.0"
- resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
- integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
- dependencies:
- domhandler "^5.0.2"
- parse5 "^7.0.0"
-
-parse5@^7.0.0:
- version "7.1.2"
- resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32"
- integrity sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==
- dependencies:
- entities "^4.4.0"
-
picomatch@^2.0.4, picomatch@^2.2.1:
version "2.3.1"
resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42"