Blame doc/s5/lxml-ep2008.html

Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
Packit Service b74dd5
<head>
Packit Service b74dd5
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
Packit Service b74dd5
<meta name="generator" content="Docutils 0.12: http://docutils.sourceforge.net/" />
Packit Service b74dd5
<meta name="version" content="S5 1.1" />
Packit Service b74dd5
<title>Implementing XML languages with lxml</title>
Packit Service b74dd5
<style type="text/css">
Packit Service b74dd5
Packit Service b74dd5
/*
Packit Service b74dd5
:Author: David Goodger (goodger@python.org)
Packit Service b74dd5
:Id: $Id: html4css1.css 7614 2013-02-21 15:55:51Z milde $
Packit Service b74dd5
:Copyright: This stylesheet has been placed in the public domain.
Packit Service b74dd5
Packit Service b74dd5
Default cascading style sheet for the HTML output of Docutils.
Packit Service b74dd5
Packit Service b74dd5
See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
Packit Service b74dd5
customize this style sheet.
Packit Service b74dd5
*/
Packit Service b74dd5
Packit Service b74dd5
/* used to remove borders from tables and images */
Packit Service b74dd5
.borderless, table.borderless td, table.borderless th {
Packit Service b74dd5
  border: 0 }
Packit Service b74dd5
Packit Service b74dd5
table.borderless td, table.borderless th {
Packit Service b74dd5
  /* Override padding for "table.docutils td" with "! important".
Packit Service b74dd5
     The right padding separates the table cells. */
Packit Service b74dd5
  padding: 0 0.5em 0 0 ! important }
Packit Service b74dd5
Packit Service b74dd5
.first {
Packit Service b74dd5
  /* Override more specific margin styles with "! important". */
Packit Service b74dd5
  margin-top: 0 ! important }
Packit Service b74dd5
Packit Service b74dd5
.last, .with-subtitle {
Packit Service b74dd5
  margin-bottom: 0 ! important }
Packit Service b74dd5
Packit Service b74dd5
.hidden {
Packit Service b74dd5
  display: none }
Packit Service b74dd5
Packit Service b74dd5
a.toc-backref {
Packit Service b74dd5
  text-decoration: none ;
Packit Service b74dd5
  color: black }
Packit Service b74dd5
Packit Service b74dd5
blockquote.epigraph {
Packit Service b74dd5
  margin: 2em 5em ; }
Packit Service b74dd5
Packit Service b74dd5
dl.docutils dd {
Packit Service b74dd5
  margin-bottom: 0.5em }
Packit Service b74dd5
Packit Service b74dd5
object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] {
Packit Service b74dd5
  overflow: hidden;
Packit Service b74dd5
}
Packit Service b74dd5
Packit Service b74dd5
/* Uncomment (and remove this text!) to get bold-faced definition list terms
Packit Service b74dd5
dl.docutils dt {
Packit Service b74dd5
  font-weight: bold }
Packit Service b74dd5
*/
Packit Service b74dd5
Packit Service b74dd5
div.abstract {
Packit Service b74dd5
  margin: 2em 5em }
Packit Service b74dd5
Packit Service b74dd5
div.abstract p.topic-title {
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  text-align: center }
Packit Service b74dd5
Packit Service b74dd5
div.admonition, div.attention, div.caution, div.danger, div.error,
Packit Service b74dd5
div.hint, div.important, div.note, div.tip, div.warning {
Packit Service b74dd5
  margin: 2em ;
Packit Service b74dd5
  border: medium outset ;
Packit Service b74dd5
  padding: 1em }
Packit Service b74dd5
Packit Service b74dd5
div.admonition p.admonition-title, div.hint p.admonition-title,
Packit Service b74dd5
div.important p.admonition-title, div.note p.admonition-title,
Packit Service b74dd5
div.tip p.admonition-title {
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  font-family: sans-serif }
Packit Service b74dd5
Packit Service b74dd5
div.attention p.admonition-title, div.caution p.admonition-title,
Packit Service b74dd5
div.danger p.admonition-title, div.error p.admonition-title,
Packit Service b74dd5
div.warning p.admonition-title, .code .error {
Packit Service b74dd5
  color: red ;
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  font-family: sans-serif }
Packit Service b74dd5
Packit Service b74dd5
/* Uncomment (and remove this text!) to get reduced vertical space in
Packit Service b74dd5
   compound paragraphs.
Packit Service b74dd5
div.compound .compound-first, div.compound .compound-middle {
Packit Service b74dd5
  margin-bottom: 0.5em }
Packit Service b74dd5
Packit Service b74dd5
div.compound .compound-last, div.compound .compound-middle {
Packit Service b74dd5
  margin-top: 0.5em }
Packit Service b74dd5
*/
Packit Service b74dd5
Packit Service b74dd5
div.dedication {
Packit Service b74dd5
  margin: 2em 5em ;
Packit Service b74dd5
  text-align: center ;
Packit Service b74dd5
  font-style: italic }
Packit Service b74dd5
Packit Service b74dd5
div.dedication p.topic-title {
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  font-style: normal }
Packit Service b74dd5
Packit Service b74dd5
div.figure {
Packit Service b74dd5
  margin-left: 2em ;
Packit Service b74dd5
  margin-right: 2em }
Packit Service b74dd5
Packit Service b74dd5
div.footer, div.header {
Packit Service b74dd5
  clear: both;
Packit Service b74dd5
  font-size: smaller }
Packit Service b74dd5
Packit Service b74dd5
div.line-block {
Packit Service b74dd5
  display: block ;
Packit Service b74dd5
  margin-top: 1em ;
Packit Service b74dd5
  margin-bottom: 1em }
Packit Service b74dd5
Packit Service b74dd5
div.line-block div.line-block {
Packit Service b74dd5
  margin-top: 0 ;
Packit Service b74dd5
  margin-bottom: 0 ;
Packit Service b74dd5
  margin-left: 1.5em }
Packit Service b74dd5
Packit Service b74dd5
div.sidebar {
Packit Service b74dd5
  margin: 0 0 0.5em 1em ;
Packit Service b74dd5
  border: medium outset ;
Packit Service b74dd5
  padding: 1em ;
Packit Service b74dd5
  background-color: #ffffee ;
Packit Service b74dd5
  width: 40% ;
Packit Service b74dd5
  float: right ;
Packit Service b74dd5
  clear: right }
Packit Service b74dd5
Packit Service b74dd5
div.sidebar p.rubric {
Packit Service b74dd5
  font-family: sans-serif ;
Packit Service b74dd5
  font-size: medium }
Packit Service b74dd5
Packit Service b74dd5
div.system-messages {
Packit Service b74dd5
  margin: 5em }
Packit Service b74dd5
Packit Service b74dd5
div.system-messages h1 {
Packit Service b74dd5
  color: red }
Packit Service b74dd5
Packit Service b74dd5
div.system-message {
Packit Service b74dd5
  border: medium outset ;
Packit Service b74dd5
  padding: 1em }
Packit Service b74dd5
Packit Service b74dd5
div.system-message p.system-message-title {
Packit Service b74dd5
  color: red ;
Packit Service b74dd5
  font-weight: bold }
Packit Service b74dd5
Packit Service b74dd5
div.topic {
Packit Service b74dd5
  margin: 2em }
Packit Service b74dd5
Packit Service b74dd5
h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
Packit Service b74dd5
h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
Packit Service b74dd5
  margin-top: 0.4em }
Packit Service b74dd5
Packit Service b74dd5
h1.title {
Packit Service b74dd5
  text-align: center }
Packit Service b74dd5
Packit Service b74dd5
h2.subtitle {
Packit Service b74dd5
  text-align: center }
Packit Service b74dd5
Packit Service b74dd5
hr.docutils {
Packit Service b74dd5
  width: 75% }
Packit Service b74dd5
Packit Service b74dd5
img.align-left, .figure.align-left, object.align-left {
Packit Service b74dd5
  clear: left ;
Packit Service b74dd5
  float: left ;
Packit Service b74dd5
  margin-right: 1em }
Packit Service b74dd5
Packit Service b74dd5
img.align-right, .figure.align-right, object.align-right {
Packit Service b74dd5
  clear: right ;
Packit Service b74dd5
  float: right ;
Packit Service b74dd5
  margin-left: 1em }
Packit Service b74dd5
Packit Service b74dd5
img.align-center, .figure.align-center, object.align-center {
Packit Service b74dd5
  display: block;
Packit Service b74dd5
  margin-left: auto;
Packit Service b74dd5
  margin-right: auto;
Packit Service b74dd5
}
Packit Service b74dd5
Packit Service b74dd5
.align-left {
Packit Service b74dd5
  text-align: left }
Packit Service b74dd5
Packit Service b74dd5
.align-center {
Packit Service b74dd5
  clear: both ;
Packit Service b74dd5
  text-align: center }
Packit Service b74dd5
Packit Service b74dd5
.align-right {
Packit Service b74dd5
  text-align: right }
Packit Service b74dd5
Packit Service b74dd5
/* reset inner alignment in figures */
Packit Service b74dd5
div.align-right {
Packit Service b74dd5
  text-align: inherit }
Packit Service b74dd5
Packit Service b74dd5
/* div.align-center * { */
Packit Service b74dd5
/*   text-align: left } */
Packit Service b74dd5
Packit Service b74dd5
ol.simple, ul.simple {
Packit Service b74dd5
  margin-bottom: 1em }
Packit Service b74dd5
Packit Service b74dd5
ol.arabic {
Packit Service b74dd5
  list-style: decimal }
Packit Service b74dd5
Packit Service b74dd5
ol.loweralpha {
Packit Service b74dd5
  list-style: lower-alpha }
Packit Service b74dd5
Packit Service b74dd5
ol.upperalpha {
Packit Service b74dd5
  list-style: upper-alpha }
Packit Service b74dd5
Packit Service b74dd5
ol.lowerroman {
Packit Service b74dd5
  list-style: lower-roman }
Packit Service b74dd5
Packit Service b74dd5
ol.upperroman {
Packit Service b74dd5
  list-style: upper-roman }
Packit Service b74dd5
Packit Service b74dd5
p.attribution {
Packit Service b74dd5
  text-align: right ;
Packit Service b74dd5
  margin-left: 50% }
Packit Service b74dd5
Packit Service b74dd5
p.caption {
Packit Service b74dd5
  font-style: italic }
Packit Service b74dd5
Packit Service b74dd5
p.credits {
Packit Service b74dd5
  font-style: italic ;
Packit Service b74dd5
  font-size: smaller }
Packit Service b74dd5
Packit Service b74dd5
p.label {
Packit Service b74dd5
  white-space: nowrap }
Packit Service b74dd5
Packit Service b74dd5
p.rubric {
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  font-size: larger ;
Packit Service b74dd5
  color: maroon ;
Packit Service b74dd5
  text-align: center }
Packit Service b74dd5
Packit Service b74dd5
p.sidebar-title {
Packit Service b74dd5
  font-family: sans-serif ;
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  font-size: larger }
Packit Service b74dd5
Packit Service b74dd5
p.sidebar-subtitle {
Packit Service b74dd5
  font-family: sans-serif ;
Packit Service b74dd5
  font-weight: bold }
Packit Service b74dd5
Packit Service b74dd5
p.topic-title {
Packit Service b74dd5
  font-weight: bold }
Packit Service b74dd5
Packit Service b74dd5
pre.address {
Packit Service b74dd5
  margin-bottom: 0 ;
Packit Service b74dd5
  margin-top: 0 ;
Packit Service b74dd5
  font: inherit }
Packit Service b74dd5
Packit Service b74dd5
pre.literal-block, pre.doctest-block, pre.math, pre.code {
Packit Service b74dd5
  margin-left: 2em ;
Packit Service b74dd5
  margin-right: 2em }
Packit Service b74dd5
Packit Service b74dd5
pre.code .ln { color: grey; } /* line numbers */
Packit Service b74dd5
pre.code, code { background-color: #eeeeee }
Packit Service b74dd5
pre.code .comment, code .comment { color: #5C6576 }
Packit Service b74dd5
pre.code .keyword, code .keyword { color: #3B0D06; font-weight: bold }
Packit Service b74dd5
pre.code .literal.string, code .literal.string { color: #0C5404 }
Packit Service b74dd5
pre.code .name.builtin, code .name.builtin { color: #352B84 }
Packit Service b74dd5
pre.code .deleted, code .deleted { background-color: #DEB0A1}
Packit Service b74dd5
pre.code .inserted, code .inserted { background-color: #A3D289}
Packit Service b74dd5
Packit Service b74dd5
span.classifier {
Packit Service b74dd5
  font-family: sans-serif ;
Packit Service b74dd5
  font-style: oblique }
Packit Service b74dd5
Packit Service b74dd5
span.classifier-delimiter {
Packit Service b74dd5
  font-family: sans-serif ;
Packit Service b74dd5
  font-weight: bold }
Packit Service b74dd5
Packit Service b74dd5
span.interpreted {
Packit Service b74dd5
  font-family: sans-serif }
Packit Service b74dd5
Packit Service b74dd5
span.option {
Packit Service b74dd5
  white-space: nowrap }
Packit Service b74dd5
Packit Service b74dd5
span.pre {
Packit Service b74dd5
  white-space: pre }
Packit Service b74dd5
Packit Service b74dd5
span.problematic {
Packit Service b74dd5
  color: red }
Packit Service b74dd5
Packit Service b74dd5
span.section-subtitle {
Packit Service b74dd5
  /* font-size relative to parent (h1..h6 element) */
Packit Service b74dd5
  font-size: 80% }
Packit Service b74dd5
Packit Service b74dd5
table.citation {
Packit Service b74dd5
  border-left: solid 1px gray;
Packit Service b74dd5
  margin-left: 1px }
Packit Service b74dd5
Packit Service b74dd5
table.docinfo {
Packit Service b74dd5
  margin: 2em 4em }
Packit Service b74dd5
Packit Service b74dd5
table.docutils {
Packit Service b74dd5
  margin-top: 0.5em ;
Packit Service b74dd5
  margin-bottom: 0.5em }
Packit Service b74dd5
Packit Service b74dd5
table.footnote {
Packit Service b74dd5
  border-left: solid 1px black;
Packit Service b74dd5
  margin-left: 1px }
Packit Service b74dd5
Packit Service b74dd5
table.docutils td, table.docutils th,
Packit Service b74dd5
table.docinfo td, table.docinfo th {
Packit Service b74dd5
  padding-left: 0.5em ;
Packit Service b74dd5
  padding-right: 0.5em ;
Packit Service b74dd5
  vertical-align: top }
Packit Service b74dd5
Packit Service b74dd5
table.docutils th.field-name, table.docinfo th.docinfo-name {
Packit Service b74dd5
  font-weight: bold ;
Packit Service b74dd5
  text-align: left ;
Packit Service b74dd5
  white-space: nowrap ;
Packit Service b74dd5
  padding-left: 0 }
Packit Service b74dd5
Packit Service b74dd5
/* "booktabs" style (no vertical lines) */
Packit Service b74dd5
table.docutils.booktabs {
Packit Service b74dd5
  border: 0px;
Packit Service b74dd5
  border-top: 2px solid;
Packit Service b74dd5
  border-bottom: 2px solid;
Packit Service b74dd5
  border-collapse: collapse;
Packit Service b74dd5
}
Packit Service b74dd5
table.docutils.booktabs * {
Packit Service b74dd5
  border: 0px;
Packit Service b74dd5
}
Packit Service b74dd5
table.docutils.booktabs th {
Packit Service b74dd5
  border-bottom: thin solid;
Packit Service b74dd5
  text-align: left;
Packit Service b74dd5
}
Packit Service b74dd5
Packit Service b74dd5
h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
Packit Service b74dd5
h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
Packit Service b74dd5
  font-size: 100% }
Packit Service b74dd5
Packit Service b74dd5
ul.auto-toc {
Packit Service b74dd5
  list-style-type: none }
Packit Service b74dd5
Packit Service b74dd5
</style>
Packit Service b74dd5
Packit Service b74dd5
<meta name="defaultView" content="slideshow" />
Packit Service b74dd5
<meta name="controlVis" content="hidden" />
Packit Service b74dd5
Packit Service b74dd5
<script src="ui/default/slides.js" type="text/javascript"></script>
Packit Service b74dd5
Packit Service b74dd5
      type="text/css" media="projection" id="slideProj" />
Packit Service b74dd5
Packit Service b74dd5
      type="text/css" media="screen" id="outlineStyle" />
Packit Service b74dd5
Packit Service b74dd5
      type="text/css" media="print" id="slidePrint" />
Packit Service b74dd5
Packit Service b74dd5
      type="text/css" media="projection" id="operaFix" />
Packit Service b74dd5
</head>
Packit Service b74dd5
<body>
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5

Implementing XML languages with lxml

Packit Service b74dd5

Dr. Stefan Behnel, EuroPython 2008, Vilnius/Lietuva

Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5

Implementing XML languages with lxml

Packit Service b74dd5

Dr. Stefan Behnel

Packit Service b74dd5
Packit Service b74dd5

http://codespeak.net/lxml/

Packit Service b74dd5

lxml-dev@codespeak.net

Packit Service b74dd5
tagpython.png
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
======= -->
Packit Service b74dd5
Packit Service b74dd5
========== -->
Packit Service b74dd5
Packit Service b74dd5
========================================== -->
Packit Service b74dd5
Packit Service b74dd5
============================ -->
Packit Service b74dd5
Packit Service b74dd5
===================== -->
Packit Service b74dd5
Packit Service b74dd5
============================ -->
Packit Service b74dd5
Packit Service b74dd5
=================== -->
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5
Packit Service b74dd5

What is an »XML language«?

Packit Service b74dd5
    Packit Service b74dd5
  • a language in XML notation
  • Packit Service b74dd5
  • aka »XML dialect«
    • Packit Service b74dd5
    • except that it's not a dialect
    • Packit Service b74dd5
      Packit Service b74dd5
      Packit Service b74dd5
    • Examples:
      • Packit Service b74dd5
      • XML Schema
      • Packit Service b74dd5
      • Atom/RSS
      • Packit Service b74dd5
      • (X)HTML
      • Packit Service b74dd5
      • Open Document Format
      • Packit Service b74dd5
      • SOAP
      • Packit Service b74dd5
      • ... add your own one here
      • Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5

        Popular mistakes to avoid (1)

        Packit Service b74dd5

        "That's easy, I can use regular expressions!"

        Packit Service b74dd5

        No, you can't.

        Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5

        Popular mistakes to avoid (2)

        Packit Service b74dd5

        "This is tree data, I'll take the DOM!"

        Packit Service b74dd5
        Packit Service b74dd5
        Packit Service b74dd5

        Popular mistakes to avoid (2)

        Packit Service b74dd5

        "This is tree data, I'll take the DOM!"

        Packit Service b74dd5
          Packit Service b74dd5
        • DOM is ubiquitous, but it's as complicated as Java
        • Packit Service b74dd5
        • uglify your application with tons of DOM code to
          • Packit Service b74dd5
          • walk over non-element nodes to find the data you need
          • Packit Service b74dd5
          • convert text content to other data types
          • Packit Service b74dd5
          • modify the XML tree in memory
          • Packit Service b74dd5
            Packit Service b74dd5
            Packit Service b74dd5
            Packit Service b74dd5

            => write verbose, redundant, hard-to-maintain code

            Packit Service b74dd5
            Packit Service b74dd5
            Packit Service b74dd5

            Popular mistakes to avoid (3)

            Packit Service b74dd5

            "SAX is so fast and consumes no memory!"

            Packit Service b74dd5
            Packit Service b74dd5
            Packit Service b74dd5

            Popular mistakes to avoid (3)

            Packit Service b74dd5

            "SAX is so fast and consumes no memory!"

            Packit Service b74dd5
              Packit Service b74dd5
            • but writing SAX code is not fast!
            • Packit Service b74dd5
            • write error-prone, state-keeping SAX code to
              • Packit Service b74dd5
              • figure out where you are
              • Packit Service b74dd5
              • find the sections you need
              • Packit Service b74dd5
              • convert text content to other data types
              • Packit Service b74dd5
              • copy the XML data into custom data classes
              • Packit Service b74dd5
              • ... and don't forget the way back into XML!
              • Packit Service b74dd5
                Packit Service b74dd5
                Packit Service b74dd5
                Packit Service b74dd5

                => write confusing state-machine code

                Packit Service b74dd5

                => debugging into existence

                Packit Service b74dd5
                Packit Service b74dd5
                Packit Service b74dd5

                Working with XML

                Packit Service b74dd5
                Packit Service b74dd5

                Getting XML work done

                Packit Service b74dd5

                (instead of getting time wasted)

                Packit Service b74dd5
                Packit Service b74dd5
                Packit Service b74dd5
                Packit Service b74dd5

                How can you work with XML?

                Packit Service b74dd5
                  Packit Service b74dd5
                • Preparation:
                  • Packit Service b74dd5
                  • Implement usable data classes as an abstraction layer
                  • Packit Service b74dd5
                  • Implement a mapping from XML to the data classes
                  • Packit Service b74dd5
                  • Implement a mapping from the data classes to XML
                  • Packit Service b74dd5
                    Packit Service b74dd5
                    Packit Service b74dd5
                  • Workflow:
                    • Packit Service b74dd5
                    • parse XML data
                    • Packit Service b74dd5
                    • map XML data to data classes
                    • Packit Service b74dd5
                    • work with data classes
                    • Packit Service b74dd5
                    • map data classes to XML
                    • Packit Service b74dd5
                    • serialise XML
                    • Packit Service b74dd5
                      Packit Service b74dd5
                      Packit Service b74dd5
                      Packit Service b74dd5
                        Packit Service b74dd5
                      • Approach:
                        • Packit Service b74dd5
                        • get rid of XML and do everything in your own code
                        • Packit Service b74dd5
                          Packit Service b74dd5
                          Packit Service b74dd5
                          Packit Service b74dd5
                          Packit Service b74dd5
                          Packit Service b74dd5

                          What if you could simplify this?

                          Packit Service b74dd5
                            Packit Service b74dd5
                          • Preparation:
                            • Packit Service b74dd5
                            • Extend usable XML API classes into an abstraction layer
                            • Packit Service b74dd5
                              Packit Service b74dd5
                              Packit Service b74dd5
                            • Workflow:
                              • Packit Service b74dd5
                              • parse XML data into XML API classes
                              • Packit Service b74dd5
                              • work with XML API classes
                              • Packit Service b74dd5
                              • serialise XML
                              • Packit Service b74dd5
                                Packit Service b74dd5
                                Packit Service b74dd5
                                Packit Service b74dd5
                                  Packit Service b74dd5
                                • Approach:
                                  • Packit Service b74dd5
                                  • cover only the quirks of XML and make it work for you
                                  • Packit Service b74dd5
                                    Packit Service b74dd5
                                    Packit Service b74dd5
                                    Packit Service b74dd5
                                    Packit Service b74dd5
                                    Packit Service b74dd5

                                    What if you could simplify this ...

                                    Packit Service b74dd5
                                      Packit Service b74dd5
                                    • ... without sacrificing usability or flexibility?
                                    • Packit Service b74dd5
                                    • ... using a high-speed, full-featured, pythonic XML toolkit?
                                    • Packit Service b74dd5
                                    • ... with the power of XPath, XSLT and XML validation?
                                    • Packit Service b74dd5
                                      Packit Service b74dd5

                                      ... then »lxml« is your friend!

                                      Packit Service b74dd5
                                      Packit Service b74dd5
                                      Packit Service b74dd5

                                      Overview

                                      Packit Service b74dd5
                                        Packit Service b74dd5
                                      • What is lxml?
                                        • Packit Service b74dd5
                                        • what & who
                                        • Packit Service b74dd5
                                          Packit Service b74dd5
                                          Packit Service b74dd5
                                        • How do you use it?
                                          • Packit Service b74dd5
                                          • Lesson 0: quick API overview
                                            • Packit Service b74dd5
                                            • ElementTree concepts and lxml features
                                            • Packit Service b74dd5
                                              Packit Service b74dd5
                                              Packit Service b74dd5
                                            • Lesson 1: parse XML
                                              • Packit Service b74dd5
                                              • how to get XML data into memory
                                              • Packit Service b74dd5
                                                Packit Service b74dd5
                                                Packit Service b74dd5
                                              • Lesson 2: generate XML
                                                • Packit Service b74dd5
                                                • how to write an XML generator for a language
                                                • Packit Service b74dd5
                                                  Packit Service b74dd5
                                                  Packit Service b74dd5
                                                • Lesson 3: working with XML trees made easy
                                                  • Packit Service b74dd5
                                                  • how to write an XML API for a language
                                                  • Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5
                                                    Packit Service b74dd5

                                                    What is lxml?

                                                    Packit Service b74dd5
                                                      Packit Service b74dd5
                                                    • a fast, full-featured toolkit for XML and HTML handling
                                                      • Packit Service b74dd5
                                                      • http://codespeak.net/lxml/
                                                      • Packit Service b74dd5
                                                      • lxml-dev@codespeak.net
                                                      • Packit Service b74dd5
                                                        Packit Service b74dd5
                                                        Packit Service b74dd5
                                                      • based on and inspired by
                                                        • Packit Service b74dd5
                                                        • the C libraries libxml2 and libxslt (by Daniel Veillard)
                                                        • Packit Service b74dd5
                                                        • the ElementTree API (by Fredrik Lundh)
                                                        • Packit Service b74dd5
                                                        • the Cython compiler (by Robert Bradshaw, Greg Ewing & me)
                                                        • Packit Service b74dd5
                                                        • the Python language (by Guido & [paste Misc/ACKS here])
                                                        • Packit Service b74dd5
                                                        • user feedback, ideas and patches (by you!)
                                                          • Packit Service b74dd5
                                                          • keep doing that, we love you all!
                                                          • Packit Service b74dd5
                                                            Packit Service b74dd5
                                                            Packit Service b74dd5
                                                            Packit Service b74dd5
                                                            Packit Service b74dd5
                                                          • maintained (and major parts) written by myself
                                                            • Packit Service b74dd5
                                                            • initial design and implementation by Martijn Faassen
                                                            • Packit Service b74dd5
                                                            • extensive HTML API and tools by Ian Bicking
                                                            • Packit Service b74dd5
                                                              Packit Service b74dd5
                                                              Packit Service b74dd5
                                                              Packit Service b74dd5
                                                              Packit Service b74dd5
                                                              Packit Service b74dd5

                                                              What do you get for your money?

                                                              Packit Service b74dd5
                                                                Packit Service b74dd5
                                                              • many tools in one:
                                                                • Packit Service b74dd5
                                                                • Generic, ElementTree compatible XML API: lxml.etree
                                                                  • Packit Service b74dd5
                                                                  • but faster for many tasks and much more feature-rich
                                                                  • Packit Service b74dd5
                                                                    Packit Service b74dd5
                                                                    Packit Service b74dd5
                                                                  • Special tool set for HTML handling: lxml.html
                                                                  • Packit Service b74dd5
                                                                  • Special API for pythonic data binding: lxml.objectify
                                                                  • Packit Service b74dd5
                                                                  • General purpose path languages: XPath and CSS selectors
                                                                  • Packit Service b74dd5
                                                                  • Validation: DTD, XML Schema, RelaxNG, Schematron
                                                                  • Packit Service b74dd5
                                                                  • XSLT, XInclude, C14N, ...
                                                                  • Packit Service b74dd5
                                                                  • Fast tree iteration, event-driven parsing, ...
                                                                  • Packit Service b74dd5
                                                                    Packit Service b74dd5
                                                                    Packit Service b74dd5
                                                                  • it's free, but it's worth every €-Cent!
                                                                    • Packit Service b74dd5
                                                                    • what users say:
                                                                      • Packit Service b74dd5
                                                                      • »no qualification, I would recommend lxml for just about any
                                                                      • Packit Service b74dd5
                                                                        HTML task«
                                                                        Packit Service b74dd5
                                                                      • »THE tool [...] for newbies and experienced developers«
                                                                      • Packit Service b74dd5
                                                                      • »you can do pretty much anything with an intuitive API«
                                                                      • Packit Service b74dd5
                                                                      • »lxml takes all the pain out of XML«
                                                                      • Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5

                                                                        Lesson 0: a quick overview

                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5

                                                                        why »lxml takes all the pain out of XML«

                                                                        Packit Service b74dd5

                                                                        (a quick overview of lxml features and ElementTree concepts)

                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        >>> some_xml_data  = "<root><speech class='dialog'>

                                                                        So be it!

                                                                        </speech>

                                                                        stuff

                                                                        </root>"
                                                                        Packit Service b74dd5
                                                                        >>> some_html_data = "

                                                                        Just a quick note
                                                                        next line

                                                                        "
                                                                        Packit Service b74dd5
                                                                        >>> xml_tree = etree.XML(some_xml_data)
                                                                        Packit Service b74dd5
                                                                        >>> html_tree = html.fragment_fromstring(some_html_data) -->
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5
                                                                        Packit Service b74dd5

                                                                        Namespaces in ElementTree

                                                                        Packit Service b74dd5
                                                                          Packit Service b74dd5
                                                                        • uses Clark notation:

                                                                        • Packit Service b74dd5
                                                                            Packit Service b74dd5
                                                                          • wrap namespace URI in <tt class="docutils literal">{...}</tt>
                                                                          • Packit Service b74dd5
                                                                          • append the tag name
                                                                          • Packit Service b74dd5
                                                                            Packit Service b74dd5
                                                                            >>> tag = "{http://www.w3.org/the/namespace}tagname"
                                                                            Packit Service b74dd5
                                                                            >>> element = etree.Element(tag)