mistpark 2.0 infrasturcture lands

This commit is contained in:
Mike Macgirvin 2010-09-08 20:14:17 -07:00
parent b49858b038
commit ffb1997902
360 changed files with 25001 additions and 457 deletions

View file

@ -0,0 +1,31 @@
<?php
/**
* XHTML 1.1 Bi-directional Text Module, defines elements that
* declare directionality of content. Text Extension Module.
*/
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
{
public $name = 'Bdo';
public $attr_collections = array(
'I18N' => array('dir' => false)
);
public function setup($config) {
$bdo = $this->addElement(
'bdo', 'Inline', 'Inline', array('Core', 'Lang'),
array(
'dir' => 'Enum#ltr,rtl', // required
// The Abstract Module specification has the attribute
// inclusions wrong for bdo: bdo allows Lang
)
);
$bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
$this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,26 @@
<?php
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{
public $name = 'CommonAttributes';
public $attr_collections = array(
'Core' => array(
0 => array('Style'),
// 'xml:space' => false,
'class' => 'Class',
'id' => 'ID',
'title' => 'CDATA',
),
'Lang' => array(),
'I18N' => array(
0 => array('Lang'), // proprietary, for xml:lang/lang
),
'Common' => array(
0 => array('Core', 'I18N')
)
);
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,38 @@
<?php
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
*/
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
{
public $name = 'Edit';
public function setup($config) {
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
$attr = array(
'cite' => 'URI',
// 'datetime' => 'Datetime', // not implemented
);
$this->addElement('del', 'Inline', $contents, 'Common', $attr);
$this->addElement('ins', 'Inline', $contents, 'Common', $attr);
}
// HTML 4.01 specifies that ins/del must not contain block
// elements when used in an inline context, chameleon is
// a complicated workaround to acheive this effect
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)
public $defines_child_def = true;
public function getChildDef($def) {
if ($def->content_model_type != 'chameleon') return false;
$value = explode('!', $def->content_model);
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,118 @@
<?php
/**
* XHTML 1.1 Forms module, defines all form-related elements found in HTML 4.
*/
class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
{
public $name = 'Forms';
public $safe = false;
public $content_sets = array(
'Block' => 'Form',
'Inline' => 'Formctrl',
);
public function setup($config) {
$form = $this->addElement('form', 'Form',
'Required: Heading | List | Block | fieldset', 'Common', array(
'accept' => 'ContentTypes',
'accept-charset' => 'Charsets',
'action*' => 'URI',
'method' => 'Enum#get,post',
// really ContentType, but these two are the only ones used today
'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data',
));
$form->excludes = array('form' => true);
$input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array(
'accept' => 'ContentTypes',
'accesskey' => 'Character',
'alt' => 'Text',
'checked' => 'Bool#checked',
'disabled' => 'Bool#disabled',
'maxlength' => 'Number',
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'size' => 'Number',
'src' => 'URI#embeds',
'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA',
));
$input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input();
$this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array(
'disabled' => 'Bool#disabled',
'multiple' => 'Bool#multiple',
'name' => 'CDATA',
'size' => 'Number',
'tabindex' => 'Number',
));
$this->addElement('option', false, 'Optional: #PCDATA', 'Common', array(
'disabled' => 'Bool#disabled',
'label' => 'Text',
'selected' => 'Bool#selected',
'value' => 'CDATA',
));
// It's illegal for there to be more than one selected, but not
// be multiple. Also, no selected means undefined behavior. This might
// be difficult to implement; perhaps an injector, or a context variable.
$textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array(
'accesskey' => 'Character',
'cols*' => 'Number',
'disabled' => 'Bool#disabled',
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'rows*' => 'Number',
'tabindex' => 'Number',
));
$textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea();
$button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array(
'accesskey' => 'Character',
'disabled' => 'Bool#disabled',
'name' => 'CDATA',
'tabindex' => 'Number',
'type' => 'Enum#button,submit,reset',
'value' => 'CDATA',
));
// For exclusions, ideally we'd specify content sets, not literal elements
$button->excludes = $this->makeLookup(
'form', 'fieldset', // Form
'input', 'select', 'textarea', 'label', 'button', // Formctrl
'a' // as per HTML 4.01 spec, this is omitted by modularization
);
// Extra exclusion: img usemap="" is not permitted within this element.
// We'll omit this for now, since we don't have any good way of
// indicating it yet.
// This is HIGHLY user-unfriendly; we need a custom child-def for this
$this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common');
$label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array(
'accesskey' => 'Character',
// 'for' => 'IDREF', // IDREF not implemented, cannot allow
));
$label->excludes = array('label' => true);
$this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array(
'accesskey' => 'Character',
));
$this->addElement('optgroup', false, 'Required: option', 'Common', array(
'disabled' => 'Bool#disabled',
'label*' => 'Text',
));
// Don't forget an injector for <isindex>. This one's a little complex
// because it maps to multiple elements.
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,31 @@
<?php
/**
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
*/
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
{
public $name = 'Hypertext';
public function setup($config) {
$a = $this->addElement(
'a', 'Inline', 'Inline', 'Common',
array(
// 'accesskey' => 'Character',
// 'charset' => 'Charset',
'href' => 'URI',
// 'hreflang' => 'LanguageCode',
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
// 'tabindex' => 'Number',
// 'type' => 'ContentType',
)
);
$a->formatting = true;
$a->excludes = array('a' => true);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,40 @@
<?php
/**
* XHTML 1.1 Image Module provides basic image embedding.
* @note There is specialized code for removing empty images in
* HTMLPurifier_Strategy_RemoveForeignElements
*/
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
{
public $name = 'Image';
public function setup($config) {
$max = $config->get('HTML.MaxImgLength');
$img = $this->addElement(
'img', 'Inline', 'Empty', 'Common',
array(
'alt*' => 'Text',
// According to the spec, it's Length, but percents can
// be abused, so we allow only Pixels.
'height' => 'Pixels#' . $max,
'width' => 'Pixels#' . $max,
'longdesc' => 'URI',
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
)
);
if ($max === null || $config->get('HTML.Trusted')) {
$img->attr['height'] =
$img->attr['width'] = 'Length';
}
// kind of strange, but splitting things up would be inefficient
$img->attr_transform_pre[] =
$img->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ImgRequired();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,143 @@
<?php
/**
* XHTML 1.1 Legacy module defines elements that were previously
* deprecated.
*
* @note Not all legacy elements have been implemented yet, which
* is a bit of a reverse problem as compared to browsers! In
* addition, this legacy module may implement a bit more than
* mandated by XHTML 1.1.
*
* This module can be used in combination with TransformToStrict in order
* to transform as many deprecated elements as possible, but retain
* questionably deprecated elements that do not have good alternatives
* as well as transform elements that don't have an implementation.
* See docs/ref-strictness.txt for more details.
*/
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
{
public $name = 'Legacy';
public function setup($config) {
$this->addElement('basefont', 'Inline', 'Empty', false, array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
'id' => 'ID'
));
$this->addElement('center', 'Block', 'Flow', 'Common');
$this->addElement('dir', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
$this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
));
$this->addElement('menu', 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
$s = $this->addElement('s', 'Inline', 'Inline', 'Common');
$s->formatting = true;
$strike = $this->addElement('strike', 'Inline', 'Inline', 'Common');
$strike->formatting = true;
$u = $this->addElement('u', 'Inline', 'Inline', 'Common');
$u->formatting = true;
// setup modifications to old elements
$align = 'Enum#left,right,center,justify';
$address = $this->addBlankElement('address');
$address->content_model = 'Inline | #PCDATA | p';
$address->content_model_type = 'optional';
$address->child = false;
$blockquote = $this->addBlankElement('blockquote');
$blockquote->content_model = 'Flow | #PCDATA';
$blockquote->content_model_type = 'optional';
$blockquote->child = false;
$br = $this->addBlankElement('br');
$br->attr['clear'] = 'Enum#left,all,right,none';
$caption = $this->addBlankElement('caption');
$caption->attr['align'] = 'Enum#top,bottom,left,right';
$div = $this->addBlankElement('div');
$div->attr['align'] = $align;
$dl = $this->addBlankElement('dl');
$dl->attr['compact'] = 'Bool#compact';
for ($i = 1; $i <= 6; $i++) {
$h = $this->addBlankElement("h$i");
$h->attr['align'] = $align;
}
$hr = $this->addBlankElement('hr');
$hr->attr['align'] = $align;
$hr->attr['noshade'] = 'Bool#noshade';
$hr->attr['size'] = 'Pixels';
$hr->attr['width'] = 'Length';
$img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
$img->attr['vspace'] = 'Pixels';
// figure out this integer business
$li = $this->addBlankElement('li');
$li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
$ol = $this->addBlankElement('ol');
$ol->attr['compact'] = 'Bool#compact';
$ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$ol->attr['type'] = 'Enum#s:1,i,I,a,A';
$p = $this->addBlankElement('p');
$p->attr['align'] = $align;
$pre = $this->addBlankElement('pre');
$pre->attr['width'] = 'Number';
// script omitted
$table = $this->addBlankElement('table');
$table->attr['align'] = 'Enum#left,center,right';
$table->attr['bgcolor'] = 'Color';
$tr = $this->addBlankElement('tr');
$tr->attr['bgcolor'] = 'Color';
$th = $this->addBlankElement('th');
$th->attr['bgcolor'] = 'Color';
$th->attr['height'] = 'Length';
$th->attr['nowrap'] = 'Bool#nowrap';
$th->attr['width'] = 'Length';
$td = $this->addBlankElement('td');
$td->attr['bgcolor'] = 'Color';
$td->attr['height'] = 'Length';
$td->attr['nowrap'] = 'Bool#nowrap';
$td->attr['width'] = 'Length';
$ul = $this->addBlankElement('ul');
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,37 @@
<?php
/**
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
*/
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
{
public $name = 'List';
// According to the abstract schema, the List content set is a fully formed
// one or more expr, but it invariably occurs in an optional declaration
// so we're not going to do that subtlety. It might cause trouble
// if a user defines "List" and expects that multiple lists are
// allowed to be specified, but then again, that's not very intuitive.
// Furthermore, the actual XML Schema may disagree. Regardless,
// we don't have support for such nested expressions without using
// the incredibly inefficient and draconic Custom ChildDef.
public $content_sets = array('Flow' => 'List');
public function setup($config) {
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');
$this->addElement('dd', false, 'Flow', 'Common');
$this->addElement('dt', false, 'Inline', 'Common');
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
{
public $name = 'Name';
public function setup($config) {
$elements = array('a', 'applet', 'form', 'frame', 'iframe', 'img', 'map');
foreach ($elements as $name) {
$element = $this->addBlankElement($name);
$element->attr['name'] = 'CDATA';
if (!$config->get('HTML.Attr.Name.UseCDATA')) {
$element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync();
}
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
<?php
class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
{
public $name = 'NonXMLCommonAttributes';
public $attr_collections = array(
'Lang' => array(
'lang' => 'LanguageCode',
)
);
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,47 @@
<?php
/**
* XHTML 1.1 Object Module, defines elements for generic object inclusion
* @warning Users will commonly use <embed> to cater to legacy browsers: this
* module does not allow this sort of behavior
*/
class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
{
public $name = 'Object';
public $safe = false;
public function setup($config) {
$this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common',
array(
'archive' => 'URI',
'classid' => 'URI',
'codebase' => 'URI',
'codetype' => 'Text',
'data' => 'URI',
'declare' => 'Bool#declare',
'height' => 'Length',
'name' => 'CDATA',
'standby' => 'Text',
'tabindex' => 'Number',
'type' => 'ContentType',
'width' => 'Length'
)
);
$this->addElement('param', false, 'Empty', false,
array(
'id' => 'ID',
'name*' => 'Text',
'type' => 'Text',
'value' => 'Text',
'valuetype' => 'Enum#data,ref,object'
)
);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,36 @@
<?php
/**
* XHTML 1.1 Presentation Module, defines simple presentation-related
* markup. Text Extension Module.
* @note The official XML Schema and DTD specs further divide this into
* two modules:
* - Block Presentation (hr)
* - Inline Presentation (b, big, i, small, sub, sup, tt)
* We have chosen not to heed this distinction, as content_sets
* provides satisfactory disambiguation.
*/
class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
{
public $name = 'Presentation';
public function setup($config) {
$this->addElement('hr', 'Block', 'Empty', 'Common');
$this->addElement('sub', 'Inline', 'Inline', 'Common');
$this->addElement('sup', 'Inline', 'Inline', 'Common');
$b = $this->addElement('b', 'Inline', 'Inline', 'Common');
$b->formatting = true;
$big = $this->addElement('big', 'Inline', 'Inline', 'Common');
$big->formatting = true;
$i = $this->addElement('i', 'Inline', 'Inline', 'Common');
$i->formatting = true;
$small = $this->addElement('small', 'Inline', 'Inline', 'Common');
$small->formatting = true;
$tt = $this->addElement('tt', 'Inline', 'Inline', 'Common');
$tt->formatting = true;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,33 @@
<?php
/**
* Module defines proprietary tags and attributes in HTML.
* @warning If this module is enabled, standards-compliance is off!
*/
class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
{
public $name = 'Proprietary';
public function setup($config) {
$this->addElement('marquee', 'Inline', 'Flow', 'Common',
array(
'direction' => 'Enum#left,right,up,down',
'behavior' => 'Enum#alternate',
'width' => 'Length',
'height' => 'Length',
'scrolldelay' => 'Number',
'scrollamount' => 'Number',
'loop' => 'Number',
'bgcolor' => 'Color',
'hspace' => 'Pixels',
'vspace' => 'Pixels',
)
);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,27 @@
<?php
/**
* XHTML 1.1 Ruby Annotation Module, defines elements that indicate
* short runs of text alongside base text for annotation or pronounciation.
*/
class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
{
public $name = 'Ruby';
public function setup($config) {
$this->addElement('ruby', 'Inline',
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
'Common');
$this->addElement('rbc', false, 'Required: rb', 'Common');
$this->addElement('rtc', false, 'Required: rt', 'Common');
$rb = $this->addElement('rb', false, 'Inline', 'Common');
$rb->excludes = array('ruby' => true);
$rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
$rt->excludes = array('ruby' => true);
$this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,34 @@
<?php
/**
* A "safe" embed module. See SafeObject. This is a proprietary element.
*/
class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
{
public $name = 'SafeEmbed';
public function setup($config) {
$max = $config->get('HTML.MaxImgLength');
$embed = $this->addElement(
'embed', 'Inline', 'Empty', 'Common',
array(
'src*' => 'URI#embedded',
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window',
'name' => 'ID',
)
);
$embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,53 @@
<?php
/**
* A "safe" object module. In theory, objects permitted by this module will
* be safe, and untrusted users can be allowed to embed arbitrary flash objects
* (maybe other types too, but only Flash is supported as of right now).
* Highly experimental.
*/
class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
{
public $name = 'SafeObject';
public function setup($config) {
// These definitions are not intrinsically safe: the attribute transforms
// are a vital part of ensuring safety.
$max = $config->get('HTML.MaxImgLength');
$object = $this->addElement(
'object',
'Inline',
'Optional: param | Flow | #PCDATA',
'Common',
array(
// While technically not required by the spec, we're forcing
// it to this value.
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
$param = $this->addElement('param', false, 'Empty', false,
array(
'id' => 'ID',
'name*' => 'Text',
'value' => 'Text'
)
);
$param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam();
$this->info_injector[] = 'SafeObject';
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,54 @@
<?php
/*
WARNING: THIS MODULE IS EXTREMELY DANGEROUS AS IT ENABLES INLINE SCRIPTING
INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
*/
/**
* XHTML 1.1 Scripting module, defines elements that are used to contain
* information pertaining to executable scripts or the lack of support
* for executable scripts.
* @note This module does not contain inline scripting elements
*/
class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
{
public $name = 'Scripting';
public $elements = array('script', 'noscript');
public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
public $safe = false;
public function setup($config) {
// TODO: create custom child-definition for noscript that
// auto-wraps stray #PCDATA in a similar manner to
// blockquote's custom definition (we would use it but
// blockquote's contents are optional while noscript's contents
// are required)
// TODO: convert this to new syntax, main problem is getting
// both content sets working
// In theory, this could be safe, but I don't see any reason to
// allow it.
$this->info['noscript'] = new HTMLPurifier_ElementDef();
$this->info['noscript']->attr = array( 0 => array('Common') );
$this->info['noscript']->content_model = 'Heading | List | Block';
$this->info['noscript']->content_model_type = 'required';
$this->info['script'] = new HTMLPurifier_ElementDef();
$this->info['script']->attr = array(
'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')),
'src' => new HTMLPurifier_AttrDef_URI(true),
'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript'))
);
$this->info['script']->content_model = '#PCDATA';
$this->info['script']->content_model_type = 'optional';
$this->info['script']->attr_transform_pre['type'] =
$this->info['script']->attr_transform_post['type'] =
new HTMLPurifier_AttrTransform_ScriptRequired();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,24 @@
<?php
/**
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
* Module.
*/
class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
{
public $name = 'StyleAttribute';
public $attr_collections = array(
// The inclusion routine differs from the Abstract Modules but
// is in line with the DTD and XML Schemas.
'Style' => array('style' => false), // see constructor
'Core' => array(0 => array('Style'))
);
public function setup($config) {
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,66 @@
<?php
/**
* XHTML 1.1 Tables Module, fully defines accessible table elements.
*/
class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
{
public $name = 'Tables';
public function setup($config) {
$this->addElement('caption', false, 'Inline', 'Common');
$this->addElement('table', 'Block',
new HTMLPurifier_ChildDef_Table(), 'Common',
array(
'border' => 'Pixels',
'cellpadding' => 'Length',
'cellspacing' => 'Length',
'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border',
'rules' => 'Enum#none,groups,rows,cols,all',
'summary' => 'Text',
'width' => 'Length'
)
);
// common attributes
$cell_align = array(
'align' => 'Enum#left,center,right,justify,char',
'charoff' => 'Length',
'valign' => 'Enum#top,middle,bottom,baseline',
);
$cell_t = array_merge(
array(
'abbr' => 'Text',
'colspan' => 'Number',
'rowspan' => 'Number',
),
$cell_align
);
$this->addElement('td', false, 'Flow', 'Common', $cell_t);
$this->addElement('th', false, 'Flow', 'Common', $cell_t);
$this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align);
$cell_col = array_merge(
array(
'span' => 'Number',
'width' => 'MultiLength',
),
$cell_align
);
$this->addElement('col', false, 'Empty', 'Common', $cell_col);
$this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col);
$this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align);
$this->addElement('thead', false, 'Required: tr', 'Common', $cell_align);
$this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,23 @@
<?php
/**
* XHTML 1.1 Target Module, defines target attribute in link elements.
*/
class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
{
public $name = 'Target';
public function setup($config) {
$elements = array('a');
foreach ($elements as $name) {
$e = $this->addBlankElement($name);
$e->attr = array(
'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
);
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,71 @@
<?php
/**
* XHTML 1.1 Text Module, defines basic text containers. Core Module.
* @note In the normative XML Schema specification, this module
* is further abstracted into the following modules:
* - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
* - Block Structural (div, p)
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
* - Inline Structural (br, span)
* This module, functionally, does not distinguish between these
* sub-modules, but the code is internally structured to reflect
* these distinctions.
*/
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
{
public $name = 'Text';
public $content_sets = array(
'Flow' => 'Heading | Block | Inline'
);
public function setup($config) {
// Inline Phrasal -------------------------------------------------
$this->addElement('abbr', 'Inline', 'Inline', 'Common');
$this->addElement('acronym', 'Inline', 'Inline', 'Common');
$this->addElement('cite', 'Inline', 'Inline', 'Common');
$this->addElement('dfn', 'Inline', 'Inline', 'Common');
$this->addElement('kbd', 'Inline', 'Inline', 'Common');
$this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI'));
$this->addElement('samp', 'Inline', 'Inline', 'Common');
$this->addElement('var', 'Inline', 'Inline', 'Common');
$em = $this->addElement('em', 'Inline', 'Inline', 'Common');
$em->formatting = true;
$strong = $this->addElement('strong', 'Inline', 'Inline', 'Common');
$strong->formatting = true;
$code = $this->addElement('code', 'Inline', 'Inline', 'Common');
$code->formatting = true;
// Inline Structural ----------------------------------------------
$this->addElement('span', 'Inline', 'Inline', 'Common');
$this->addElement('br', 'Inline', 'Empty', 'Core');
// Block Phrasal --------------------------------------------------
$this->addElement('address', 'Block', 'Inline', 'Common');
$this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
$pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
$pre->excludes = $this->makeLookup(
'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
$this->addElement('h1', 'Heading', 'Inline', 'Common');
$this->addElement('h2', 'Heading', 'Inline', 'Common');
$this->addElement('h3', 'Heading', 'Inline', 'Common');
$this->addElement('h4', 'Heading', 'Inline', 'Common');
$this->addElement('h5', 'Heading', 'Inline', 'Common');
$this->addElement('h6', 'Heading', 'Inline', 'Common');
// Block Structural -----------------------------------------------
$p = $this->addElement('p', 'Block', 'Inline', 'Common');
$p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul"));
$this->addElement('div', 'Block', 'Flow', 'Common');
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,207 @@
<?php
/**
* Abstract class for a set of proprietary modules that clean up (tidy)
* poorly written HTML.
* @todo Figure out how to protect some of these methods/properties
*/
class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
{
/**
* List of supported levels. Index zero is a special case "no fixes"
* level.
*/
public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
/**
* Default level to place all fixes in. Disabled by default
*/
public $defaultLevel = null;
/**
* Lists of fixes used by getFixesForLevel(). Format is:
* HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
*/
public $fixesForLevel = array(
'light' => array(),
'medium' => array(),
'heavy' => array()
);
/**
* Lazy load constructs the module by determining the necessary
* fixes to create and then delegating to the populate() function.
* @todo Wildcard matching and error reporting when an added or
* subtracted fix has no effect.
*/
public function setup($config) {
// create fixes, initialize fixesForLevel
$fixes = $this->makeFixes();
$this->makeFixesForLevel($fixes);
// figure out which fixes to use
$level = $config->get('HTML.TidyLevel');
$fixes_lookup = $this->getFixesForLevel($level);
// get custom fix declarations: these need namespace processing
$add_fixes = $config->get('HTML.TidyAdd');
$remove_fixes = $config->get('HTML.TidyRemove');
foreach ($fixes as $name => $fix) {
// needs to be refactored a little to implement globbing
if (
isset($remove_fixes[$name]) ||
(!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
) {
unset($fixes[$name]);
}
}
// populate this module with necessary fixes
$this->populate($fixes);
}
/**
* Retrieves all fixes per a level, returning fixes for that specific
* level as well as all levels below it.
* @param $level String level identifier, see $levels for valid values
* @return Lookup up table of fixes
*/
public function getFixesForLevel($level) {
if ($level == $this->levels[0]) {
return array();
}
$activated_levels = array();
for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
$activated_levels[] = $this->levels[$i];
if ($this->levels[$i] == $level) break;
}
if ($i == $c) {
trigger_error(
'Tidy level ' . htmlspecialchars($level) . ' not recognized',
E_USER_WARNING
);
return array();
}
$ret = array();
foreach ($activated_levels as $level) {
foreach ($this->fixesForLevel[$level] as $fix) {
$ret[$fix] = true;
}
}
return $ret;
}
/**
* Dynamically populates the $fixesForLevel member variable using
* the fixes array. It may be custom overloaded, used in conjunction
* with $defaultLevel, or not used at all.
*/
public function makeFixesForLevel($fixes) {
if (!isset($this->defaultLevel)) return;
if (!isset($this->fixesForLevel[$this->defaultLevel])) {
trigger_error(
'Default level ' . $this->defaultLevel . ' does not exist',
E_USER_ERROR
);
return;
}
$this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
}
/**
* Populates the module with transforms and other special-case code
* based on a list of fixes passed to it
* @param $lookup Lookup table of fixes to activate
*/
public function populate($fixes) {
foreach ($fixes as $name => $fix) {
// determine what the fix is for
list($type, $params) = $this->getFixType($name);
switch ($type) {
case 'attr_transform_pre':
case 'attr_transform_post':
$attr = $params['attr'];
if (isset($params['element'])) {
$element = $params['element'];
if (empty($this->info[$element])) {
$e = $this->addBlankElement($element);
} else {
$e = $this->info[$element];
}
} else {
$type = "info_$type";
$e = $this;
}
// PHP does some weird parsing when I do
// $e->$type[$attr], so I have to assign a ref.
$f =& $e->$type;
$f[$attr] = $fix;
break;
case 'tag_transform':
$this->info_tag_transform[$params['element']] = $fix;
break;
case 'child':
case 'content_model_type':
$element = $params['element'];
if (empty($this->info[$element])) {
$e = $this->addBlankElement($element);
} else {
$e = $this->info[$element];
}
$e->$type = $fix;
break;
default:
trigger_error("Fix type $type not supported", E_USER_ERROR);
break;
}
}
}
/**
* Parses a fix name and determines what kind of fix it is, as well
* as other information defined by the fix
* @param $name String name of fix
* @return array(string $fix_type, array $fix_parameters)
* @note $fix_parameters is type dependant, see populate() for usage
* of these parameters
*/
public function getFixType($name) {
// parse it
$property = $attr = null;
if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
// figure out the parameters
$params = array();
if ($name !== '') $params['element'] = $name;
if (!is_null($attr)) $params['attr'] = $attr;
// special case: attribute transform
if (!is_null($attr)) {
if (is_null($property)) $property = 'pre';
$type = 'attr_transform_' . $property;
return array($type, $params);
}
// special case: tag transform
if (is_null($property)) {
return array('tag_transform', $params);
}
return array($property, $params);
}
/**
* Defines all fixes the module will perform in a compact
* associative array of fix name to fix implementation.
*/
public function makeFixes() {}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,24 @@
<?php
/**
* Name is deprecated, but allowed in strict doctypes, so onl
*/
class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
{
public $name = 'Tidy_Name';
public $defaultLevel = 'heavy';
public function makeFixes() {
$r = array();
// @name for img, a -----------------------------------------------
// Technically, it's allowed even on strict, so we allow authors to use
// it. However, it's deprecated in future versions of XHTML.
$r['img@name'] =
$r['a@name'] = new HTMLPurifier_AttrTransform_Name();
return $r;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,24 @@
<?php
class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_Tidy
{
public $name = 'Tidy_Proprietary';
public $defaultLevel = 'light';
public function makeFixes() {
$r = array();
$r['table@background'] = new HTMLPurifier_AttrTransform_Background();
$r['td@background'] = new HTMLPurifier_AttrTransform_Background();
$r['th@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tr@background'] = new HTMLPurifier_AttrTransform_Background();
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
$r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
class HTMLPurifier_HTMLModule_Tidy_Strict extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
{
public $name = 'Tidy_Strict';
public $defaultLevel = 'light';
public function makeFixes() {
$r = parent::makeFixes();
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
public $defines_child_def = true;
public function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
<?php
class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4
{
public $name = 'Tidy_Transitional';
public $defaultLevel = 'heavy';
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,17 @@
<?php
class HTMLPurifier_HTMLModule_Tidy_XHTML extends HTMLPurifier_HTMLModule_Tidy
{
public $name = 'Tidy_XHTML';
public $defaultLevel = 'medium';
public function makeFixes() {
$r = array();
$r['@lang'] = new HTMLPurifier_AttrTransform_Lang();
return $r;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,161 @@
<?php
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends HTMLPurifier_HTMLModule_Tidy
{
public function makeFixes() {
$r = array();
// == deprecated tag transforms ===================================
$r['font'] = new HTMLPurifier_TagTransform_Font();
$r['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
$r['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
$r['center'] = new HTMLPurifier_TagTransform_Simple('div', 'text-align:center;');
$r['u'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:underline;');
$r['s'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
$r['strike'] = new HTMLPurifier_TagTransform_Simple('span', 'text-decoration:line-through;');
// == deprecated attribute transforms =============================
$r['caption@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we're following IE's behavior, not Firefox's, due
// to the fact that no one supports caption-side:right,
// W3C included (with CSS 2.1). This is a slightly
// unreasonable attribute!
'left' => 'text-align:left;',
'right' => 'text-align:right;',
'top' => 'caption-side:top;',
'bottom' => 'caption-side:bottom;' // not supported by IE
));
// @align for img -------------------------------------------------
$r['img@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'right' => 'float:right;',
'top' => 'vertical-align:top;',
'middle' => 'vertical-align:middle;',
'bottom' => 'vertical-align:baseline;',
));
// @align for table -----------------------------------------------
$r['table@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
'left' => 'float:left;',
'center' => 'margin-left:auto;margin-right:auto;',
'right' => 'float:right;'
));
// @align for hr -----------------------------------------------
$r['hr@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', array(
// we use both text-align and margin because these work
// for different browsers (IE and Firefox, respectively)
// and the melange makes for a pretty cross-compatible
// solution
'left' => 'margin-left:0;margin-right:auto;text-align:left;',
'center' => 'margin-left:auto;margin-right:auto;text-align:center;',
'right' => 'margin-left:auto;margin-right:0;text-align:right;'
));
// @align for h1, h2, h3, h4, h5, h6, p, div ----------------------
// {{{
$align_lookup = array();
$align_values = array('left', 'right', 'center', 'justify');
foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;";
// }}}
$r['h1@align'] =
$r['h2@align'] =
$r['h3@align'] =
$r['h4@align'] =
$r['h5@align'] =
$r['h6@align'] =
$r['p@align'] =
$r['div@align'] =
new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup);
// @bgcolor for table, tr, td, th ---------------------------------
$r['table@bgcolor'] =
$r['td@bgcolor'] =
$r['th@bgcolor'] =
new HTMLPurifier_AttrTransform_BgColor();
// @border for img ------------------------------------------------
$r['img@border'] = new HTMLPurifier_AttrTransform_Border();
// @clear for br --------------------------------------------------
$r['br@clear'] =
new HTMLPurifier_AttrTransform_EnumToCSS('clear', array(
'left' => 'clear:left;',
'right' => 'clear:right;',
'all' => 'clear:both;',
'none' => 'clear:none;',
));
// @height for td, th ---------------------------------------------
$r['td@height'] =
$r['th@height'] =
new HTMLPurifier_AttrTransform_Length('height');
// @hspace for img ------------------------------------------------
$r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace');
// @noshade for hr ------------------------------------------------
// this transformation is not precise but often good enough.
// different browsers use different styles to designate noshade
$r['hr@noshade'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'noshade',
'color:#808080;background-color:#808080;border:0;'
);
// @nowrap for td, th ---------------------------------------------
$r['td@nowrap'] =
$r['th@nowrap'] =
new HTMLPurifier_AttrTransform_BoolToCSS(
'nowrap',
'white-space:nowrap;'
);
// @size for hr --------------------------------------------------
$r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height');
// @type for li, ol, ul -------------------------------------------
// {{{
$ul_types = array(
'disc' => 'list-style-type:disc;',
'square' => 'list-style-type:square;',
'circle' => 'list-style-type:circle;'
);
$ol_types = array(
'1' => 'list-style-type:decimal;',
'i' => 'list-style-type:lower-roman;',
'I' => 'list-style-type:upper-roman;',
'a' => 'list-style-type:lower-alpha;',
'A' => 'list-style-type:upper-alpha;'
);
$li_types = $ul_types + $ol_types;
// }}}
$r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types);
$r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true);
$r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true);
// @vspace for img ------------------------------------------------
$r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace');
// @width for hr, td, th ------------------------------------------
$r['td@width'] =
$r['th@width'] =
$r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width');
return $r;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
<?php
class HTMLPurifier_HTMLModule_XMLCommonAttributes extends HTMLPurifier_HTMLModule
{
public $name = 'XMLCommonAttributes';
public $attr_collections = array(
'Lang' => array(
'xml:lang' => 'LanguageCode',
)
);
}
// vim: et sw=4 sts=4