﻿//-----------------------------------------------------------------------------
// XHTML
//
// Copyright 2005-2010 - Xcential Group LLC.
//
//-----------------------------------------------------------------------------

//=============================================================================
// Constructor

function XHTML()
{

   //--------------------------------------------------------------------------
   // Private Interface

   //--------------------------------------------------------------------------
   // Privileged Interface

   //--------------------------------------------------------------------------
   // Initialization

}

XHTML.objectClass = "XHTML";

//=============================================================================
// Static Interface

XHTML.NAMESPACE_URI        = "http://www.w3.org/1999/xhtml";
XHTML.PREFERRED_PREFIX     = "xhtml";

XHTML.FORMAT               = "xhtml";

XHTML.VALIDATE             = true;
XHTML.STRIP_ATTRIBUTES     = true;
XHTML.SYNTHESIZE_TAGS      = true;
XHTML.ADD_NAMESPACE_DECL   = true;

//-----------------------------------------------------------------------------

XHTML.valueOf = function()
{

   return "XHTML";
}

//-----------------------------------------------------------------------------

XHTML.isA = function(
   xNode,
   xhtmlName,
   className
)
{
   xNode = (xNode == null) ? null : xNode;
   xhtmlName = (xhtmlName == null) ? null : xhtmlName.toString();
   className = (className == null) ? null : className.toString();

   if (xNode == null || xhtmlName == null)
      return false;

   if (xNode.isNothing())
      return false;

   if (xNode.getNodeType() != XNode.NODE_ELEMENT)
      return false;

   if (className != null && xNode.getAttribute("class") != className)
      return false;

   if (xhtmlName == "*" && xNode.getNamespaceURI() == XHTML.NAMESPACE_URI)
      return true;

   if (xNode.getAttribute("role") == xhtmlName.toLowerCase())
      return true;

   if (xNode.getAttribute("base") == "slim:" + xhtmlName)
      return true;

   if (xNode.getLocalName() == xhtmlName && xNode.getNamespaceURI() == XHTML.NAMESPACE_URI)
      return true;

   return false;
}

XHTML.convertToText = function(
   xml
)
{
   xml = (xml == null) ? xml : xml;

   if (!xml)
      return "";

   var text = xml;

   text = text.replace(/<\!\[CDATA\[/g,"");
   text = text.replace(/\]\]>/g,"");
   text = text.replace(/<.*?>/g, "");
   text = text.replace(/&amp;/g, "&");
   text = text.replace(/&lt;/g, "<");
   text = text.replace(/&gt;/g, ">");
   text = text.replace(/&quot;/g, "\"");
   text = text.replace(/&apos;/g, "'");

   return text;
}

//-----------------------------------------------------------------------------

XHTML.convertToHTML = function(
   xml
)
{
   xml = (xml == null) ? xml : xml;

   if (!xml)
      return "";

   var html = xml;

   html = html.replace(/<\!\[CDATA\[/g,"");
   html = html.replace(/\]\]>/g,"");
   html = html.replace(/xhtml\:/g, "");
   html = html.replace(/xmlns\:[^\s\>]+/g, "");
   html = html.replace(/\s/g, " ");
   html = html.replace(/xmlns:xhtml\=[\'\"][^\s\>]*][\'\"]/g, "");

   return html;
}

//-----------------------------------------------------------------------------

XHTML.convertToParas = function(
   xhtml
)
{

   xhtml = xhtml.replace(/\r?\n\r?/g, " ");
   var restoreDiv = false;
   if ((/^\s*<xhtml:div[^>]*>\s*<xhtml:div>/).test(xhtml) && (/<\/xhtml:div[^>]*>\s*<\/xhtml:div>\s*$/).test(xhtml))
   {
      xhtml = xhtml.replace(/^\s*<xhtml:div[^>]*>\s*<xhtml:div>/, "<xhtml:div>");
      xhtml = xhtml.replace(/<\/xhtml:div[^>]*>\s*<\/xhtml:div>\s*$/, "</xhtml:div>");
      restoreDiv = true;
   }
   xhtml = xhtml.replace(/<xhtml:div[^>]*>\s*<xhtml:br\/>/g, "<xhtml:div>");
   xhtml = xhtml.replace(/<\/xhtml:div>\s*<xhtml:br\/>/g, "</xhtml:div>");
   xhtml = xhtml.replace(/\s*<xhtml:br\/>\s*<xhtml:div[^>]*>/g, "<xhtml:div>");
   xhtml = xhtml.replace(/\s*<xhtml:br\/>\s*<\/xhtml:div>/g, "</xhtml:div>");
   xhtml = xhtml.replace(/<xhtml:div[^>]*>(.*?)<\/xhtml:div>/g, "   <xhtml:p>$1</xhtml:p>\n");

   return (xhtml.length > 0 && restoreDiv) ? "<xhtml:div>\n" + xhtml + "</xhtml:div>" : xhtml;;
}

//-----------------------------------------------------------------------------

XHTML.fix = function(
   xhtml
)
{

   if (!xhtml)
      return "";

   // This gets rid of erroneous XHTML that may have gotten into data

   xhtml = xhtml.replace(/<xhtml:(meta|link)\s+(.*?)\/?>/g,"<xhtml:$1 $2/>");

   return xhtml;
}

//-----------------------------------------------------------------------------

XHTML.convertToXHTML = function(
   html,
   validate,
   stripAttributes,
   synthesizeTags,
   addNamespaceDecl
)
{
   html = (html == null) ? null : html;
   validate = (validate == null) ? false : validate;
   stripAttributes = (stripAttributes == null) ? false : stripAttributes;
   synthesizeTags = (synthesizeTags == null) ? false : synthesizeTags;
   addNamespaceDecl = (addNamespaceDecl == null) ? false : addNamespaceDecl;

   if (!html)
      return "";

   var xml = html;

   xml = xml.replace(/\r?\n\r?/g, " ");

   xml = xml.replace(/\&nbsp;/g, "&#160;");
   xml = xml.replace(/&(amp|lt|gt|quot|apos);/g, "CHAR-ENTITY($1)");
   xml = xml.replace(/&#([^;]{1,4});/g, "CHAR-REF($1)");
   xml = xml.replace(/&/g, "&amp;");
   xml = xml.replace(/CHAR-ENTITY\(([^\)]+)\)/g, "&$1;");
   xml = xml.replace(/CHAR-REF\(([^\)]+)\)/g, "&#$1;");

   // Get rid of MS office tags
   xml = xml.replace(/<\/?(ve|o|o12|r|m|v|wp|w10|w|st1):[^>]+>/g," ");
   xml = xml.replace(/<\?xml:namespace[^>]*>/g, ""); // Bizarre half processing instruction thingy

   xml = xml.replace(/xhtml\:/g, "");
   xml = xml.replace(/</g, "<xhtml:");
   xml = xml.replace(/<xhtml:meta[^>]*>/ig,"");
   xml = xml.replace(/<xhtml:link[^>]*>/ig,"");
   xml = xml.replace(/<xhtml:\//g, "</xhtml:");
   xml = xml.replace(/<xhtml:\!/g, "<!");
   xml = xml.replace(/<xhtml:([^\s\>]*)\:/g, "<$1:");
   xml = xml.replace(/<xhtml:br>/gi, "<xhtml:br/>"); // Must be case insensitive at this point
   xml = xml.replace(/xmlns:xhtml\=[\'\"][^\s\>]*][\'\"]/g, "");

   // Get rid of all input areas.
   xml = xml.replace(/<\/?xhtml:input[^>]*>/gi, "");
   xml = xml.replace(/<\/?xhtml:textarea[^>]*>/gi, "");

   // This makes regexp handling easier (it is undone later)
   xml = xml.replace(/\/>/g, ">>");

   // Remove any embedded script or objects
   xml = xml.replace(/<xhtml:script[^>]*>.*?<\/xhtml:script>/ig,"");
   xml = xml.replace(/<xhtml:embed[^>]*>>/ig,"");
   xml = xml.replace(/<xhtml:embed[^>]*>.*?<\/xhtml:embed>/ig,"");
   xml = xml.replace(/<xhtml:object[^>]*>>/ig,"");
   xml = xml.replace(/<xhtml:object[^>]*>.*?<\/xhtml:object>/ig,"");

   // Create character entities for non ANSI characters
   var newXML = "";
   for (var i=0, begin=0; i<xml.length; i++)
   {
      var charCode = xml.charCodeAt(i);
      if (charCode > 255)
      {
         if (begin < i)
            newXML += xml.substr(begin,i-begin);
         newXML += "&#" + charCode + ";";
         begin = i+1;
      }
   }
   if (begin < i)
      xml = newXML + xml.substr(begin,i-begin);

   // Cleanup all tags and attributes
   var newXML = "";
   while (XMatch(xml,/(xhtml\:[^\s\>]+)/))
   {
      newXML += XMatch.leftContext;
      newXML += XMatch.matches[1].toLowerCase();
      xml = XMatch.rightContext;
      if (XMatch(xml,/^\s*([^>]+)>/))
      {
         var attributes = XMatch.matches[1];
         xml = XMatch.rightContext;
         if (!stripAttributes)
         {
            while (XMatch(attributes,/([^\s\=]+)\=\'([^\']*)\'/))
            {
               newXML += " " + XMatch.matches[1].toLowerCase() + "='" + XMatch.matches[2] + "'";
               attributes = XMatch.leftContext + " " + XMatch.rightContext;
            }
            while (XMatch(attributes,/([^\s\=]+)\=\"([^\"]*)\"/))
            {
               newXML += " " + XMatch.matches[1].toLowerCase() + "=\"" + XMatch.matches[2] + "\"";
               attributes = XMatch.leftContext + " " + XMatch.rightContext;
            }
            while (XMatch(attributes,/([^\s\=]+)\=([^\s]+)/))
            {
               newXML += " " + XMatch.matches[1].toLowerCase() + "=\"" + XMatch.matches[2] + "\"";
               attributes = XMatch.leftContext + " " + XMatch.rightContext;
            }
         }
         newXML += ">";
      }
   }
   xml = newXML + xml;

   xml = xml.replace(/>>/g, "/>");

   // This is a last ditch effort to save some markup - the next attempt strips away all tags
   if (synthesizeTags)
   {
      xml = xml.replace(/<xhtml:p>/g, "{p}");
      xml = xml.replace(/<\/xhtml:p>/g, "{/p}");
      xml = xml.replace(/<xhtml:table>/g, "{table}");
      xml = xml.replace(/<xhtml:tr>/g, "{tr}");
      xml = xml.replace(/<xhtml:td>/g, "{td}");
      xml = xml.replace(/<\/xhtml:table>/g, "{/table}");
      xml = xml.replace(/<.*?>/g, " ");
      xml = xml.replace(/{p}/g, "<xhtml:p>");
      xml = xml.replace(/{\/p}/g, "</xhtml:p>");
      xml = xml.replace(/{table}/g, "<xhtml:table>");
      xml = xml.replace(/{tr}/g, "</xhtml:tr><xhtml:tr>");
      xml = xml.replace(/{td}/g, "</xhtml:td></xhtml:td>");
      xml = xml.replace(/{\/table}/g, "</xhtml:table>");
      xml = xml.replace(/<xhtml:table>\s*<\/xhtml:tr>/g, "<xhtml:table>");
      xml = xml.replace(/<xhtml:tr>\s*<\/xhtml:td>/g, "<xhtml:tr>");
      xml = xml.replace(/<xhtml:td>\s*<\/xhtml:tr>/g, "</xhtml:tr>");
      xml = xml.replace(/<xhtml:tr>\s*<\/xhtml:table>/g, "</xhtml:table>");
   }

   if (validate)
   {
      try
      {
         var verifyXML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
         verifyXML += "<xhtml:div xmlns:xhtml=\"" + XHTML.NAMESPACE_URI + "\">";
         verifyXML += xml;
         verifyXML += "</xhtml:div>"
         var xDoc = XDoc(verifyXML);
         if (xDoc.toXML() == "")
         {
            if (!stripAttributes)
               xml = XHTML.convertToXHTML(html, XHTML.VALIDATE, XHTML.STRIP_ATTRIBUTES);
            else if (!synthesizeTags)
               xml = XHTML.convertToXHTML(html, XHTML.VALIDATE, XHTML.STRIP_ATTRIBUTES, XHTML.SYNTHESIZE_TAGS);
            else
               throw XMsg("XHTML document is not well-formed.");
         }
      }
      catch (error)
      {
         xml = "<xhtml:div>" + XString(xml.replace(/<[^>]+>/g, " ")).normalize() + "</xhtml:div>";
      }
   }

   if (addNamespaceDecl && XMatch(xml,/(<xhtml:[^\s\>]*)/))
   {
      xml = XMatch.leftContext;
      xml += XMatch.matches[1];
      xml += " xmlns:xhtml=\"" + XHTML.NAMESPACE_URI + "\" ";
      xml += XMatch.rightContext;
   }

   return xml;
}

//=============================================================================
// Public Interface

//=============================================================================


