diff --git a/README.md b/README.md index 6091c0a..280e194 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,7 @@ scraper = cloudscraper.create_scraper(interpreter='nodejs') - **[2captcha](https://www.2captcha.com/)** - **[anticaptcha](https://www.anti-captcha.com/)** +- **[Captcha AI](https://www.captchaai.io/)** - **[CapMonster Cloud](https://capmonster.cloud/)** - **[deathbycaptcha](https://www.deathbycaptcha.com/)** - **[9kw](https://www.9kw.eu/)** @@ -336,7 +337,6 @@ if proxies are set you can disable sending the proxies to 2captcha by setting `n ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={ 'provider': '2captcha', 'api_key': 'your_2captcha_api_key' @@ -364,7 +364,6 @@ if proxies are set you can disable sending the proxies to anticaptcha by setting ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={ 'provider': 'anticaptcha', 'api_key': 'your_anticaptcha_api_key' @@ -374,6 +373,29 @@ scraper = cloudscraper.create_scraper( ------ +#### Captcha AI + +##### Required `captcha` Parameters + +|Parameter|Value|Required|Default| +|-------------|:-------------:|:-----:|:-----:| +|provider|(string) `captchaai`|yes|| +|api_key|(string)|yes|| + + +##### Example + +```python +scraper = cloudscraper.create_scraper( + captcha={ + 'provider': 'captchaai', + 'api_key': 'your_captchaai_api_key' + } +) +``` + +------ + #### CapMonster Cloud ##### Required `captcha` Parameters @@ -392,7 +414,6 @@ if proxies are set you can disable sending the proxies to CapMonster by setting ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={ 'provider': 'capmonster', 'clientKey': 'your_capmonster_clientKey' @@ -416,7 +437,6 @@ scraper = cloudscraper.create_scraper( ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={ 'provider': 'deathbycaptcha', 'username': 'your_deathbycaptcha_username', @@ -441,7 +461,6 @@ scraper = cloudscraper.create_scraper( ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={ 'provider': '9kw', 'api_key': 'your_9kw_api_key', @@ -465,7 +484,6 @@ Use this if you want the requests response payload without solving the Captcha. ##### Example ```python scraper = cloudscraper.create_scraper( - interpreter='nodejs', captcha={'provider': 'return_response'} ) ``` diff --git a/cloudscraper/captcha/captchaai.py b/cloudscraper/captcha/captchaai.py new file mode 100644 index 0000000..4bbf2a1 --- /dev/null +++ b/cloudscraper/captcha/captchaai.py @@ -0,0 +1,152 @@ +from __future__ import absolute_import + +import requests + +from ..exceptions import ( + CaptchaServiceUnavailable, + CaptchaAPIError, + CaptchaTimeout, + CaptchaParameter, + CaptchaBadJobID +) + + +try: + import polling2 +except ImportError: + raise ImportError("Please install the python module 'polling2' via pip") + +from . import Captcha + + +class captchaSolver(Captcha): + def __init__(self): + self.host = 'https://api.captchaai.io' + self.session = requests.Session() + super(captchaSolver, self).__init__('captchaai') + + # ------------------------------------------------------------------------------- # + + @staticmethod + def checkErrorStatus(response, request_type): + if response.status_code in [500, 502]: + raise CaptchaServiceUnavailable(f'CaptchaAI: Server Side Error {response.status_code}') + + try: + rPayload = response.json() + except Exception: + return + + if rPayload.get('errorDescription', False) and 'Current system busy' not in rayload['errorDescription']: + raise CaptchaAPIError( + f"CaptchaAI: {request_type} -> {rPayload.get('errorDescription')}" + ) + + # ------------------------------------------------------------------------------- # + + def requestJob(self, jobID): + if not jobID: + raise CaptchaBadJobID("CaptchaAI: Error bad job id to request task result.") + + def _checkRequest(response): + self.checkErrorStatus(response, 'getTaskResult') + try: + rPayload = response.json() + if response.ok: + if rPayload.get("solution", {}).get('gRecaptchaResponse'): + return True + except Exception: + pass + return None + + response = polling2.poll( + lambda: self.session.post( + f'{self.host}/getTaskResult', + json={ + 'clientKey': self.api_key, + 'taskId': jobID + }, + timeout=30 + ), + check_success=_checkRequest, + step=5, + timeout=180 + ) + + if response: + try: + rPayload = response.json() + if rPayload.get('solution', {}).get('gRecaptchaResponse'): + return rPayload['solution']['gRecaptchaResponse'] + except Exception: + pass + + raise CaptchaTimeout( + "CaptchaAI: Error failed to solve Captcha." + ) + + # ------------------------------------------------------------------------------- # + + def requestSolve(self, captchaType, url, siteKey): + def _checkRequest(response): + self.checkErrorStatus(response, 'createTask') + try: + rPayload = response.json() + if response.ok: + if rPayload.get("taskId", False): + return True + except Exception: + pass + return None + + response = polling2.poll( + lambda: self.session.post( + f'{self.host}/createTask', + json={ + 'clientKey': self.api_key, + 'appId': '9E717405-8C70-49B3-B277-7C2F2196484B', + 'task': { + 'type': 'HCaptchaTaskProxyless', + 'websiteURL': url, + 'websiteKey': siteKey + } + }, + allow_redirects=False, + timeout=30 + ), + check_success=_checkRequest, + step=5, + timeout=180 + ) + + if response: + rPayload = response.json() + if rPayload.get('taskId'): + return rPayload['taskId'] + + raise CaptchaBadJobID( + 'CaptchaAI: Error no job id was returned.' + ) + + # ------------------------------------------------------------------------------- # + + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): + if not captchaParams.get('api_key'): + raise CaptchaParameter("CaptchaAI: Missing api_key parameter.") + + self.api_key = captchaParams.get('api_key') + + try: + jobID = self.requestSolve(captchaType, url, siteKey) + return self.requestJob(jobID) + except polling2.TimeoutException: + raise CaptchaTimeout( + f"captchaAI: Captcha solve (task ID: {jobID}) took to long." + ) + + raise CaptchaAPIError('CaptchaAI: Job Failure.') + + +# ------------------------------------------------------------------------------- # + +captchaSolver()