PipedreamHQ · luancazarine · May 23, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/components/scrapeless/actions/get-scrape-result/get-scrape-result.mjs b/components/scrapeless/actions/get-scrape-result/get-scrape-result.mjs
@@ -0,0 +1,34 @@
+import scrapeless from "../../scrapeless.app.mjs";
+
+export default {
+  key: "scrapeless-get-scrape-result",
+  name: "Get Scrape Result",
+  description: "Retrieve the result of a completed scraping job. [See the documentation](https://apidocs.scrapeless.com/api-11949853)",
+  version: "0.0.1",
+  type: "action",
+  props: {
+    scrapeless,
+    scrapeJobId: {
+      type: "string",
+      label: "Scrape Job ID",
+      description: " The ID of the scrape job you want to retrieve results for. This ID is provided when you submit a scrape job.",
+    },
+  },
+  async run({ $ }) {
+    try {
+      const response = await this.scrapeless.getScrapeResult({
+        $,
+        scrapeJobId: this.scrapeJobId,
+      });
+
+      $.export("$summary", `Successfully retrieved scrape results for job ID ${this.scrapeJobId}`);
+      return response;
+    } catch ({ response }) {
+      $.export("$summary", `Successfully retrieved scrape result with error for job ID ${this.scrapeJobId}`);
+      return {
+        success: false,
+        ...response.data,
+      };
+    }
+  },
+};
diff --git a/components/scrapeless/actions/submit-scrape-job/submit-scrape-job.mjs b/components/scrapeless/actions/submit-scrape-job/submit-scrape-job.mjs
@@ -0,0 +1,76 @@
+import { ConfigurationError } from "@pipedream/platform";
+import { ACTOR_OPTIONS } from "../../common/constants.mjs";
+import { parseObject } from "../../common/utils.mjs";
+import scrapeless from "../../scrapeless.app.mjs";
+
+export default {
+  key: "scrapeless-submit-scrape-job",
+  name: "Submit Scrape Job",
+  description: "Submit a new web scraping job with specified target URL and extraction rules. [See the documentation](https://apidocs.scrapeless.com/api-11949852)",
+  version: "0.0.1",
+  type: "action",
+  props: {
+    scrapeless,
+    actor: {
+      type: "string",
+      label: "Actor",
+      description: "The actor to use for the scrape job. This can be a specific user or a system account.",
+      options: ACTOR_OPTIONS,
+    },
+    inputUrl: {
+      type: "string",
+      label: "Input URL",
+      description: "Target URL to scrape. This is the URL of the web page you want to extract data from.",
+      optional: true,
+    },
+    proxyCountry: {
+      type: "string",
+      label: "Proxy Country",
+      description: "The country to route the request through. This can help in bypassing geo-restrictions.",
+      optional: true,
+    },
+    additionalInput: {
+      type: "object",
+      label: "Additional Input",
+      description: "Additional input parameters if you need to pass a specific configuration based on the actor. [See the documentation](https://apidocs.scrapeless.com/) for further details.",
+      optional: true,
+    },
+    asyncMode: {
+      type: "boolean",
+      label: "Async Mode",
+      description: "Whether to run the scrape job in asynchronous mode. If set to true, the job will be processed in the background.",
+    },
+  },
+  async run({ $ }) {
+    try {
+      const data = {
+        actor: this.actor,
+        input: parseObject(this.additionalInput),
+      };
+
+      if (this.asyncMode) {
+        data.async = this.asyncMode;
+      }
+      if (this.inputUrl) {
+        data.input.url = this.inputUrl;
+      }
+      if (this.proxyCountry) {
+        data.proxy = {
+          country: this.proxyCountry,
+        };
+      }
+
+      const response = await this.scrapeless.submitScrapeJob({
+        $,
+        data,
+      });
+
+      $.export("$summary", this.asyncMode
+        ? `Successfully submitted scrape job with ID: ${response.taskId}`
+        : "Successfully scraped the target configuration.");
+      return response;
+    } catch ({ response }) {
+      throw new ConfigurationError(response.data.message);
+    }
+  },
+};
diff --git a/components/scrapeless/common/constants.mjs b/components/scrapeless/common/constants.mjs
@@ -0,0 +1,138 @@
+export const ACTOR_OPTIONS = [
+  {
+    label: "Shopee",
+    value: "scraper.shopee",
+  },
+  {
+    label: "BR Sites - Solucoes cnpjreva",
+    value: "scraper.solucoes",
+  },
+  {
+    label: "BR Sites - Solucoes certidaointernet",
+    value: "scraper.solucoes.certidaointernet",
+  },
+  {
+    label: "BR Sites - Servicos receita",
+    value: "scraper.servicos.receita",
+  },
+  {
+    label: "BR Sites - Consopt",
+    value: "scraper.consopt",
+  },
+  {
+    label: "Avnet",
+    value: "scraper.avnet",
+  },
+  {
+    label: "Arrow",
+    value: "scraper.arrow",
+  },
+  {
+    label: "Airline Iberia",
+    value: "scraper.iberia",
+  },
+  {
+    label: "Airline Expedia",
+    value: "scraper.expedia",
+  },
+  {
+    label: "Airline Kayak",
+    value: "scraper.kayak",
+  },
+  {
+    label: "Amazon Product",
+    value: "scraper.amazon.product",
+  },
+  {
+    label: "Amazon Seller",
+    value: "scraper.amazon.seller",
+  },
+  {
+    label: "Amazon Keywords",
+    value: "scraper.amazon.keywords",
+  },
+  {
+    label: "Temu",
+    value: "scraper.temu.mobile.detail",
+  },
+  {
+    label: "Google Search",
+    value: "scraper.google.search",
+  },
+  {
+    label: "Google Trends",
+    value: "scraper.google.trends",
+  },
+  {
+    label: "Google FLights",
+    value: "scraper.google.flights",
+  },
+  {
+    label: "Google FLights Chart",
+    value: "scraper.google.flights.chart",
+  },
+  {
+    label: "Google Maps",
+    value: "scraper.google.maps",
+  },
+  {
+    label: "Google Scholar",
+    value: "scraper.google.scholar",
+  },
+  {
+    label: "Google Jobs",
+    value: "scraper.google.jobs",
+  },
+  {
+    label: "Google Shopping",
+    value: "scraper.google.shopping",
+  },
+  {
+    label: "Google Hotels",
+    value: "scraper.google.hotels",
+  },
+  {
+    label: "Google News",
+    value: "scraper.google.news",
+  },
+  {
+    label: "Google Lens",
+    value: "scraper.google.lens",
+  },
+  {
+    label: "Google Finance",
+    value: "scraper.google.finance",
+  },
+  {
+    label: "Google Product",
+    value: "scraper.google.product",
+  },
+  {
+    label: "Google Play Games",
+    value: "scraper.google.play.games",
+  },
+  {
+    label: "Google Play Books",
+    value: "scraper.google.play.books",
+  },
+  {
+    label: "Google Play Movies",
+    value: "scraper.google.play.movies",
+  },
+  {
+    label: "Google Play Product",
+    value: "scraper.google.play.product",
+  },
+  {
+    label: "Google Play Apps",
+    value: "scraper.google.play",
+  },
+  {
+    label: "Google Ads",
+    value: "scraper.google.ads",
+  },
+  {
+    label: "Mouser",
+    value: "scraper.mouser",
+  },
+];
diff --git a/components/scrapeless/common/utils.mjs b/components/scrapeless/common/utils.mjs
@@ -0,0 +1,24 @@
+export const parseObject = (obj) => {
+  if (!obj) return undefined;
+
+  if (Array.isArray(obj)) {
+    return obj.map((item) => {
+      if (typeof item === "string") {
+        try {
+          return JSON.parse(item);
+        } catch (e) {
+          return item;
+        }
+      }
+      return item;
+    });
+  }
+  if (typeof obj === "string") {
+    try {
+      return JSON.parse(obj);
+    } catch (e) {
+      return obj;
+    }
+  }
+  return obj;
+};
diff --git a/components/scrapeless/package.json b/components/scrapeless/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@pipedream/scrapeless",
-  "version": "0.0.1",
+  "version": "0.1.0",
   "description": "Pipedream Scrapeless Components",
   "main": "scrapeless.app.mjs",
   "keywords": [
@@ -11,5 +11,8 @@
   "author": "Pipedream <[email protected]> (https://pipedream.com/)",
   "publishConfig": {
     "access": "public"
+  },
+  "dependencies": {
+    "@pipedream/platform": "^3.0.3"
   }
-}
+}
diff --git a/components/scrapeless/scrapeless.app.mjs b/components/scrapeless/scrapeless.app.mjs
@@ -1,11 +1,37 @@
+import { axios } from "@pipedream/platform";
+
 export default {
   type: "app",
   app: "scrapeless",
-  propDefinitions: {},
   methods: {
-    // this.$auth contains connected account data
-    authKeys() {
-      console.log(Object.keys(this.$auth));
+    _baseUrl() {
+      return "https://api.scrapeless.com/api/v1";
+    },
+    _headers() {
+      return {
+        "x-api-token": `${this.$auth.api_key}`,
+      };
+    },
+    _makeRequest({
+      $ = this, path, ...opts
+    }) {
+      return axios($, {
+        url: this._baseUrl() + path,
+        headers: this._headers(),
+        ...opts,
+      });
+    },
+    submitScrapeJob(opts = {}) {
+      return this._makeRequest({
+        method: "POST",
+        path: "/scraper/request",
+        ...opts,
+      });
+    },
+    getScrapeResult({ scrapeJobId }) {
+      return this._makeRequest({
+        path: `/scraper/result/${scrapeJobId}`,
+      });
     },
   },
 };
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml