Package lxml :: Package html :: Module defs
[frames] | no frames]

Source Code for Module lxml.html.defs

  1  # FIXME: this should all be confirmed against what a DTD says 
  2  # (probably in a test; this may not match the DTD exactly, but we 
  3  # should document just how it differs). 
  4   
  5  # Data taken from http://www.w3.org/TR/html401/index/elements.html 
  6   
  7  empty_tags = [ 
  8      'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 
  9      'img', 'input', 'isindex', 'link', 'meta', 'param'] 
 10   
 11  deprecated_tags = [ 
 12      'applet', 'basefont', 'center', 'dir', 'font', 'isindex', 
 13      'menu', 's', 'strike', 'u'] 
 14   
 15  # archive actually takes a space-separated list of URIs 
 16  link_attrs = [ 
 17      'action', 'archive', 'background', 'cite', 'classid', 
 18      'codebase', 'data', 'href', 'longdesc', 'profile', 'src', 
 19      'usemap', 
 20      # Not standard: 
 21      'dynsrc', 'lowsrc', 
 22      ] 
 23   
 24  # Not in the HTML 4 spec: 
 25  # onerror, onresize 
 26  event_attrs = [ 
 27      'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', 
 28      'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 
 29      'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 
 30      'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit', 
 31      'onunload', 
 32      ] 
 33   
 34  safe_attrs = [ 
 35      'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 
 36      'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 
 37      'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 
 38      'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 
 39      'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 
 40      'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 
 41      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 
 42      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 
 43      'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 
 44      'type', 'usemap', 'valign', 'value', 'vspace', 'width'] 
 45   
 46  # From http://htmlhelp.com/reference/html40/olist.html 
 47  top_level_tags = [ 
 48      'html', 'head', 'body', 'frameset', 
 49      ] 
 50   
 51  head_tags = [ 
 52      'base', 'isindex', 'link', 'meta', 'script', 'style', 'title', 
 53      ] 
 54   
 55  general_block_tags = [ 
 56      'address', 
 57      'blockquote', 
 58      'center', 
 59      'del', 
 60      'div', 
 61      'h1', 
 62      'h2', 
 63      'h3', 
 64      'h4', 
 65      'h5', 
 66      'h6', 
 67      'hr', 
 68      'ins', 
 69      'isindex', 
 70      'noscript', 
 71      'p', 
 72      'pre', 
 73      ] 
 74   
 75  list_tags = [ 
 76      'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul', 
 77      ] 
 78   
 79  table_tags = [ 
 80      'table', 'caption', 'colgroup', 'col', 
 81      'thead', 'tfoot', 'tbody', 'tr', 'td', 'th', 
 82      ] 
 83   
 84  # just this one from 
 85  # http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm 
 86  block_tags = general_block_tags + list_tags + table_tags + [ 
 87      # Partial form tags 
 88      'fieldset', 'form', 'legend', 'optgroup', 'option', 
 89      ] 
 90   
 91  form_tags = [ 
 92      'form', 'button', 'fieldset', 'legend', 'input', 'label', 
 93      'select', 'optgroup', 'option', 'textarea', 
 94      ] 
 95   
 96  special_inline_tags = [ 
 97      'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe', 
 98      'img', 'map', 'area', 'object', 'param', 'q', 'script', 
 99      'span', 'sub', 'sup', 
100      ] 
101   
102  phrase_tags = [ 
103      'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em', 
104      'ins', 'kbd', 'samp', 'strong', 'var', 
105      ] 
106   
107  font_style_tags = [ 
108      'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u', 
109      ] 
110   
111  frame_tags = [ 
112      'frameset', 'frame', 'noframes', 
113      ] 
114   
115  # These tags aren't standard 
116  nonstandard_tags = ['blink', 'marque'] 
117   
118  tags = (top_level_tags + head_tags + general_block_tags + list_tags 
119          + table_tags + form_tags + special_inline_tags + phrase_tags 
120          + font_style_tags + nonstandard_tags) 
121