Package lxml :: Package html :: Module defs
[hide private]
[frames] | no frames]

Source Code for Module lxml.html.defs

  1  # FIXME: this should all be confirmed against what a DTD says 
  2  # (probably in a test; this may not match the DTD exactly, but we 
  3  # should document just how it differs). 
  4   
  5  # Data taken from http://www.w3.org/TR/html401/index/elements.html 
  6  # and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements 
  7  # for html5_tags. 
  8   
  9  try: 
 10      frozenset 
 11  except NameError: 
 12      from sets import Set as frozenset 
 13   
 14   
 15  empty_tags = frozenset([ 
 16      'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 
 17      'img', 'input', 'isindex', 'link', 'meta', 'param']) 
 18   
 19  deprecated_tags = frozenset([ 
 20      'applet', 'basefont', 'center', 'dir', 'font', 'isindex', 
 21      'menu', 's', 'strike', 'u']) 
 22   
 23  # archive actually takes a space-separated list of URIs 
 24  link_attrs = frozenset([ 
 25      'action', 'archive', 'background', 'cite', 'classid', 
 26      'codebase', 'data', 'href', 'longdesc', 'profile', 'src', 
 27      'usemap', 
 28      # Not standard: 
 29      'dynsrc', 'lowsrc', 
 30      ]) 
 31   
 32  # Not in the HTML 4 spec: 
 33  # onerror, onresize 
 34  event_attrs = frozenset([ 
 35      'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', 
 36      'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 
 37      'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 
 38      'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit', 
 39      'onunload', 
 40      ]) 
 41   
 42  safe_attrs = frozenset([ 
 43      'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 
 44      'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 
 45      'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 
 46      'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 
 47      'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 
 48      'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 
 49      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 
 50      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 
 51      'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 
 52      'type', 'usemap', 'valign', 'value', 'vspace', 'width']) 
 53   
 54  # From http://htmlhelp.com/reference/html40/olist.html 
 55  top_level_tags = frozenset([ 
 56      'html', 'head', 'body', 'frameset', 
 57      ]) 
 58   
 59  head_tags = frozenset([ 
 60      'base', 'isindex', 'link', 'meta', 'script', 'style', 'title', 
 61      ]) 
 62   
 63  general_block_tags = frozenset([ 
 64      'address', 
 65      'blockquote', 
 66      'center', 
 67      'del', 
 68      'div', 
 69      'h1', 
 70      'h2', 
 71      'h3', 
 72      'h4', 
 73      'h5', 
 74      'h6', 
 75      'hr', 
 76      'ins', 
 77      'isindex', 
 78      'noscript', 
 79      'p', 
 80      'pre', 
 81      ]) 
 82   
 83  list_tags = frozenset([ 
 84      'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul', 
 85      ]) 
 86   
 87  table_tags = frozenset([ 
 88      'table', 'caption', 'colgroup', 'col', 
 89      'thead', 'tfoot', 'tbody', 'tr', 'td', 'th', 
 90      ]) 
 91   
 92  # just this one from 
 93  # http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm 
 94  block_tags = general_block_tags | list_tags | table_tags | frozenset([ 
 95      # Partial form tags 
 96      'fieldset', 'form', 'legend', 'optgroup', 'option', 
 97      ]) 
 98   
 99  form_tags = frozenset([ 
100      'form', 'button', 'fieldset', 'legend', 'input', 'label', 
101      'select', 'optgroup', 'option', 'textarea', 
102      ]) 
103   
104  special_inline_tags = frozenset([ 
105      'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe', 
106      'img', 'map', 'area', 'object', 'param', 'q', 'script', 
107      'span', 'sub', 'sup', 
108      ]) 
109   
110  phrase_tags = frozenset([ 
111      'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em', 
112      'ins', 'kbd', 'samp', 'strong', 'var', 
113      ]) 
114   
115  font_style_tags = frozenset([ 
116      'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u', 
117      ]) 
118   
119  frame_tags = frozenset([ 
120      'frameset', 'frame', 'noframes', 
121      ]) 
122       
123  html5_tags = frozenset([ 
124      'article', 'aside', 'audio', 'canvas', 'command', 'datalist', 
125      'details', 'embed', 'figcaption', 'figure', 'footer', 'header', 
126      'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output', 
127      'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary', 
128      'svg', 'time', 'track', 'video', 'wbr' 
129      ]) 
130   
131  # These tags aren't standard 
132  nonstandard_tags = frozenset(['blink', 'marquee']) 
133   
134   
135  tags = (top_level_tags | head_tags | general_block_tags | list_tags 
136          | table_tags | form_tags | special_inline_tags | phrase_tags 
137          | font_style_tags | nonstandard_tags | html5_tags) 
138