Blob Blame History Raw
>>> from lxml.html import usedoctest
>>> from lxml.html import fromstring, tostring
>>> h = fromstring('''<html><body>
... <form action="test">
...   <input type="hidden" name="hidden_field" value="hidden_value">
...   <input type="text" name="text_field" value="text_value">
...   <input type="checkbox" name="single_checkbox">
...   <input type="checkbox" name="single_checkbox2" value="good">
...   <input type="checkbox" name="check_group" value="1">
...   <input type="checkbox" name="check_group" value="2" checked>
...   <input type="checkbox" name="check_group" value="3" checked>
...   <input type="checkbox" name="check_group" value="4">
...   <textarea name="textarea_field">some text</textarea>
...   <label for="value1">value 1</label>
...     <input type="radio" name="radios" value="value1" id="value1">
...   <label for="value2">value 2</label>
...     <input type="radio" name="radios" value="value2" id="value2">
...   <label for="value3">value 3</label>
...     <input type="radio" name="radios" value="value3" id="value3" checked>
...   <select name="select1">
...     <option> No value </option>
...     <option value="">Empty</option>
...     <option value="1">number 1</option>
...   </select>
...   <select name="select2" multiple>
...     <option value="1">number 1</option>
...     <option value="2">number 2</option>
...     <option value="3">number 3</option>
...     <option>number 4</option>
...   </select>
...   <select name="select3">
...     <option value="01 " selected>text 1</option>
...     <option value=" 02">text 2</option>
...   </select>
...   <select name="select4" multiple>
...     <option value="01 " selected>text 1</option>
...     <option value=" 02">text 2</option>
...   </select>
...   <input type="file" name="file_field" value="nonsense_value">
...   <input type="submit" name="submit1" value="submit">
...   <input type="submit" name="submit2" value="submit">
...   <input type="reset" name="reset1">linksys
... </form>
... </body></html>''', base_url='http://example.org/form.html')
>>> h.base_url
u'http://example.org/form.html'
>>> f = h.forms[0]
>>> f.action
u'http://example.org/test'
>>> f.method
'GET'
>>> f.inputs # doctest:+NOPARSE_MARKUP
<InputGetter for form 0>
>>> hidden = f.inputs['hidden_field']
>>> hidden.checkable
False
>>> hidden.value
'hidden_value'
>>> hidden.value = 'new value'
>>> tostring(hidden, with_tail=False)
b'<input type="hidden" name="hidden_field" value="new value">'
>>> checkbox = f.inputs['single_checkbox']
>>> checkbox.checkable
True
>>> checkbox.type
'checkbox'
>>> checkbox.checked
False
>>> print(checkbox.value)
None
>>> checkbox.checked = True
>>> checkbox.value
'on'
>>> tostring(checkbox, with_tail=False)
b'<input type="checkbox" name="single_checkbox" checked>'
>>> checkbox2 = f.inputs['single_checkbox2']
>>> checkbox2.checked = True
>>> checkbox2.value
'good'
>>> group = f.inputs['check_group']
>>> group.value # doctest:+NOPARSE_MARKUP
<CheckboxValues {'2', '3'} for checkboxes name='check_group'>
>>> group.value.add('1')
>>> group.value # doctest:+NOPARSE_MARKUP
<CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
>>> tostring(group[0], with_tail=False)
b'<input type="checkbox" name="check_group" value="1" checked>'
>>> group.value_options
['1', '2', '3', '4']
>>> group.value.add('doesnotexist')
Traceback (most recent call last):
    ...
KeyError: "No checkbox with value 'doesnotexist'"
>>> textarea = f.inputs['textarea_field']
>>> textarea.value
'some text'
>>> radios = f.inputs['radios']
>>> radios[0].label.text
'value 1'
>>> radios.value
'value3'
>>> radios.value = 'value1'
>>> radios.value
'value1'
>>> tostring(radios[0], with_tail=False)
b'<input type="radio" name="radios" value="value1" id="value1" checked>'
>>> radios.value = None
>>> tostring(radios[0], with_tail=False)
b'<input type="radio" name="radios" value="value1" id="value1">'
>>> radios.value_options
['value1', 'value2', 'value3']
>>> select = f.inputs['select1']
>>> print(select.value)
No value
>>> select.value = ""
>>> select.value
''
>>> select.value = 'asdf'
Traceback (most recent call last):
    ...
ValueError: There is no option with the value of 'asdf'
>>> select.value_options
['No value', '', '1']
>>> select.value = 'No value'
>>> select.value
'No value'
>>> select = f.inputs['select2']
>>> select.value # doctest:+NOPARSE_MARKUP
<MultipleSelectOptions {} for select name='select2'>
>>> select.value.update(['2', '3'])
>>> select.value # doctest:+NOPARSE_MARKUP
<MultipleSelectOptions {'2', '3'} for select name='select2'>
>>> select.value.remove('3')
>>> select.value.add('asdf')
Traceback (most recent call last):
    ...
ValueError: There is no option with the value 'asdf'
>>> select.value.add('number 4')
>>> select.value # doctest:+NOPARSE_MARKUP
<MultipleSelectOptions {'2', 'number 4'} for select name='select2'>
>>> select.value.remove('number 4')
>>> select.value_options
['1', '2', '3', 'number 4']
>>> select = f.inputs['select3']
>>> select.value
'01 '
>>> select.value_options
['01 ', ' 02']
>>> select.value = " 02"
>>> select.value
' 02'
>>> select = f.inputs['select4']
>>> select.value # doctest:+NOPARSE_MARKUP
<MultipleSelectOptions {'01 '} for select name='select4'>
>>> select.value.add(' 02')
>>> select.value # doctest:+NOPARSE_MARKUP
<MultipleSelectOptions {'01 ', ' 02'} for select name='select4'>
>>> try: from urllib import urlencode
... except ImportError: from urllib.parse import urlencode
>>> print(urlencode(f.form_values()))
hidden_field=new+value&text_field=text_value&single_checkbox=on&single_checkbox2=good&check_group=1&check_group=2&check_group=3&textarea_field=some+text&select1=No+value&select2=2&select3=+02&select4=01+&select4=+02
>>> fields = f.fields
>>> fields # doctest:+NOPARSE_MARKUP
<FieldsDict for form 0>
>>> for name, value in sorted(fields.items()):
...     print('%s: %r' % (name, value))
check_group: <CheckboxValues {'1', '2', '3'} for checkboxes name='check_group'>
file_field: 'nonsense_value'
hidden_field: 'new value'
radios: None
reset1: None
select1: 'No value'
select2: <MultipleSelectOptions {'2'} for select name='select2'>
select3: ' 02'
select4: <MultipleSelectOptions {'01 ', ' 02'} for select name='select4'>
single_checkbox: 'on'
single_checkbox2: 'good'
submit1: 'submit'
submit2: 'submit'
text_field: 'text_value'
textarea_field: 'some text'

>>> import lxml.html
>>> tree = lxml.html.fromstring('''
... <html><body>
...  <form>
...   <input name="foo" value="bar" disabled/>
...   <input type="submit" />
...  </form>
... </body></html>
... ''')
>>> tree # doctest: +ELLIPSIS
<Element html at ...>
>>> tree.forms[0] # doctest: +ELLIPSIS
<Element form at ...>
>>> tree.forms[0].fields # doctest: +NOPARSE_MARKUP
<FieldsDict for form 0>
>>> list(tree.forms[0].fields.keys())
['foo']
>>> list(tree.forms[0].fields.items())
[('foo', 'bar')]
>>> list(tree.forms[0].fields.values())
['bar']

>>> ('foo', 'bar') not in tree.forms[0].form_values()
True
>>> tree = lxml.html.fromstring('''
... <html><body>
...  <form>
...   <textarea name="foo">some <b>text<br>content</b> with tags</textarea>
...  </form>
... </body></html>
... ''')
>>> list(tree.forms[0].fields.keys())
['foo']
>>> ta = tree.forms[0].inputs['foo']
>>> print(ta.value)
some <b>text<br>content</b> with tags
>>> ta.value = 'abc<br>def'
>>> print(ta.value)
abc<br>def
>>> len(ta)
0