#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
import click
from enum import Enum
VERSION = '0.1.0'
sess = requests.Session()
base_headers = {
# request (x)html form
'Accept': 'text/html,application/xhtml+xml',
'User-Agent': 'htformtool/{version}'.format(version=VERSION),
}
#
post_headers = {
# request confirmation code
'Accept': 'text/plain',
}
def hide_ua(ctx, param, value):
if not value or ctx.resilient_parsing:
return
base_headers['User-Agent'] = None
def split_on_ascii_whitespace(inp):
start_position = 0
end_position = 0
tokens = []
while start_position < len(inp) and inp[start_position] in '\x09\x0A\x0C\x0D\x20':
start_position = start_position + 1
while start_position < len(inp):
end_position = start_position
while end_position < len(inp) and inp[end_position] not in '\x09\x0A\x0C\x0D\x20':
end_position = end_position + 1
tokens.append(inp[start_position:end_position])
start_position = end_position
while start_position < len(inp) and inp[start_position] in '\x09\x0A\x0C\x0D\x20':
start_position = start_position + 1
return tokens
def ascii_lowercase(s):
import string
return s.translate(str.maketrans(string.ascii_uppercase, string.ascii_lowercase))
def get_encoding(label):
# fuck
if ascii_lowercase(label) not in ('unicode-1-1-utf-8', 'utf-8', 'utf8'):
raise NotImplementedError
import codecs
return codecs.lookup('utf-8')
import re
newline_normalize = re.compile('\x0D(?!\x0A)|(?<!\x0D)\x0A')
def append_an_entry(l, name, value, no_line_break_normalization=False):
# TODO might not be *strictly* correct
name = newline_normalize.sub('\r\n', name)
try:
if not no_line_break_normalization:
value = newline_normalize.sub('\r\n', value)
except ValueError:
pass
l.append((name, value))
class FieldState(Enum):
# normal <input> types
HIDDEN = 'hidden'
TEXT = 'text'
SEARCH = 'search'
TELEPHONE = 'tel'
URL = 'url'
EMAIL = 'email'
PASSWORD = 'password'
DATE = 'date'
MONTH = 'month'
WEEK = 'week'
TIME = 'time'
LOCAL_DATE_AND_TIME = 'datetime-local'
NUMBER = 'number'
RANGE = 'range'
COLOR = 'color'
CHECKBOX = 'checkbox'
RADIO = 'radio'
FILE = 'file'
SUBMIT = 'submit'
IMAGE = 'image'
RESET = 'reset'
BUTTON = 'button'
# custom, htformtool-specific <input> types
CREDENTIALS = 'credentials'
# non-<input> types
TEXTAREA = 'textarea'
SELECT = 'select'
# <button> types
BSUBMIT = 'bsubmit'
BRESET = 'breset'
BBUTTON = 'bbutton'
def is_button(self, submitter=None):
if self in (FieldState.BSUBMIT, FieldState.IMAGE, FieldState.SUBMIT):
return submitter is None or submitter == True
if self in (FieldState.BRESET, FieldState.BBUTTON, FieldState.RESET, FieldState.BUTTON):
return submitter is None or submitter == False
return False
def blocks_implicit_submission(self):
return self in (FieldState.TEXT, FieldState.SEARCH, FieldState.URL, FieldState.TELEPHONE,
FieldState.EMAIL, FieldState.PASSWORD, FieldState.DATE, FieldState.MONTH,
FieldState.WEEK, FieldState.TIME, FieldState.LOCAL_DATE_AND_TIME, FieldState.NUMBER)
class ConstraintError(ValueError):
pass
class FormData:
"""
Represents the data to be submitted by the form.
"""
def __init__(self, encoding, entry_list, action, enctype, method, target):
self.encoding = encoding
"""The codec object that should be used to encode the form for sending"""
self.entry_list = entry_list
"""The entry list"""
self.action = action
"""The form's raw action (URL) (not parsed)"""
self.enctype = enctype
"""The form's enctype"""
self.method = method
"""The form's method (not sanitized)"""
self.target = target
"""The form's target (not sanitized)"""
class Form:
def __init__(self, form):
self.form = form
# these have the same length
self.elements = []
self.fields = []
def submit(self, document_encoding, submitter=None):
"""
Submits the form implicitly, or with the given submitter.
Raises ValueError if the given submitter isn't a valid submitter.
Raises ConstraintError if this field's no-validate state is false and one or more of the form's fields is invalid. (note: ConstraintError is a subtype of ValueError)
Returns a FormData object, or None if implicit submission is not allowed.
"""
if submitter is not None:
if not submitter in self.fields:
raise ValueError
if not submitter.is_button(submitter=True):
raise ValueError
if not submitter.no_validate():
for field in self.fields:
field.check_value()
elif not self.form.get('novalidate'):
blocks_implicit_submission = 0
for field in self.fields:
field.check_value()
if submitter is None:
if field.is_button(submitter=True):
blocks_implicit_submission = 0
submitter = field
elif field._blocks_implicit_submission():
blocks_implicit_submission += 1
if blocks_implicit_submission > 1:
return None
encoding = document_encoding
if self.form.get('accept-charset') is not None:
candidate_enc_labels = split_on_ascii_whitespace(self.form['accept-charset'])
candidate_enc = []
for token in candidate_enc_labels:
enc = get_encoding(token)
if enc is not None:
candidate_enc.append(enc)
if not candidate_enc:
encoding = get_encoding('utf-8')
else:
encoding = candidate_enc[0]
controls = self.fields
entry_list = []
for field in controls:
if field.is_button() and field is not submitter:
continue
if field.is_checkable() and not field.is_checked():
continue
if field.is_image_button():
name = field.field['name'] + '.' if field.field.get('name') else ''
namex = name + 'x'
namey = name + 'y'
append_an_entry(entry_list, namex, 0)
append_an_entry(entry_list, namey, 0)
continue
name = field.field['name']
if field.is_select():
for option in field.get_options():
raise NotImplementedError
elif field.is_checkable():
append_an_entry(entry_list, name, field.get_value())
elif field.is_file():
raise NotImplementedError
elif field.is_hidden() and name == '_charset_':
raise NotImplementedError
elif field.is_textarea():
raise NotImplementedError
else:
append_an_entry(name, field.get_value())
if field.has_valid_dirname():
dirname = field.field['dirname']
raise NotImplementedError
action = None
if submitter is not None and submitter.field.get('formaction') is not None:
action = submitter.field['formaction']
if action is None and self.form.get('action'):
action = self.form['action']
if action is None:
action = ''
enctype = None
if submitter is not None and submitter.field.get('formenctype') is not None:
enctype = submitter.field['formenctype']
if enctype is None and self.form.get('enctype'):
enctype = self.form['enctype']
enctype = ascii_lowercase(enctype)
if enctype not in ('application/x-www-form-urlencoded', 'multipart/form-data', 'text/plain'):
enctype = 'application/x-www-form-urlencoded'
method = None
if submitter is not None and submitter.field.get('formmethod') is not None:
method = submitter.field['formmethod']
if method is None and self.form.get('method'):
method = self.form['method']
method = ascii_lowercase(method)
if method not in ('get', 'post', 'dialog'):
method = 'get'
# WARNING: NOT SANITIZED
target = None
if submitter is not None and submitter.field.get('formtarget') is not None:
target = submitter.field['formtarget']
else:
if self.form.get('target') is not None:
target = self.form['target']
elif self.form.find_parent('[document]').base is not None and self