alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Lift Framework example source code file (PCDataMarkupParser.scala)

This example Lift Framework source code file (PCDataMarkupParser.scala) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Java - Lift Framework tags/keywords

boolean, boolean, box, elem, io, namespacebinding, namespacebinding, node, null, some, string, string, stringbuilder, stringbuilder, text

The Lift Framework PCDataMarkupParser.scala source code

/*
 * Copyright 2006-2011 WorldWide Conferencing, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.liftweb
package util

import common._

import scala.xml.parsing.{ MarkupParser, MarkupHandler, FatalError, ConstructingHandler, ExternalSources }
import scala.xml.dtd._
import scala.xml._
import java.io.InputStream
import scala.io.{ Codec, Source }

/**
 * Utilities for simplifying use of named HTML symbols.
 */
object HtmlEntities {
  /**
   * A list of tuples which match named HTML symbols to their character codes.
   */
  val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165),
                     ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482),
                     ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184),
                     ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247),
                     ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200),
                     ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209),
                     ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219),
                     ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228),
                     ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237),
                     ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246),
                     ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338),
                     ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206),
                     ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222),
                     ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242),
                     ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593),
                     ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660),
                     ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719),
                     ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744),
                     ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804),
                     ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901),
                     ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827),
                     ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919),
                     ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929),
                     ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947),
                     ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957),
                     ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967),
                     ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982))

  val entMap: Map[String, Char] = Map.empty ++ entList.map{ case (name, value) => (name, value.toChar)}

  val revMap: Map[Char, String] = Map(entList.map{ case (name, value) => (value.toChar, name)} :_*)

  val entities = entList.map{case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))}

  def apply() = entities
}

/**
 * Extends the Markup Parser to do the right thing (tm) with PCData blocks
 */
trait PCDataMarkupParser[PCM <: MarkupParser with MarkupHandler] extends MarkupParser { self: PCM =>
  /** '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'
   *
   * see [15]
   */
  override def xCharData: NodeSeq = {
    xToken("[CDATA[")
    val pos1 = pos
    val sb: StringBuilder = new StringBuilder()
    while (true) {
      if (ch==']'  &&
          { sb.append(ch); nextch; ch == ']' } &&
          { sb.append(ch); nextch; ch == '>' } ) {
        sb.setLength(sb.length - 2);
        nextch;
        return PCData(sb.toString)
      } else sb.append( ch );
      nextch;
    }
    throw FatalError("this cannot happen");
  }
}

class PCDataXmlParser(val input: Source) extends ConstructingHandler with PCDataMarkupParser[PCDataXmlParser] with ExternalSources  {
  val preserveWS = true
  ent ++= HtmlEntities()
  import scala.xml._
  /** parse attribute and create namespace scope, metadata
   *  [41] Attributes    ::= { S Name Eq AttValue }
   */
  override def xAttributes(pscope:NamespaceBinding): (MetaData,NamespaceBinding) = {
    var scope: NamespaceBinding = pscope
    var aMap: MetaData = Null
    while (isNameStart(ch)) {
      val pos = this.pos

      val qname = xName
      val _     = xEQ
      val value = xAttributeValue()

      Utility.prefix(qname) match {
        case Some("xmlns") =>
          val prefix = qname.substring(6 /* xmlns: */ , qname.length);
          scope = new NamespaceBinding(prefix, value, scope);

        case Some(prefix)       =>
          val key = qname.substring(prefix.length+1, qname.length);
          aMap = new PrefixedAttribute(prefix, key, Text(value), aMap);

        case _             =>
          if( qname == "xmlns" )
          scope = new NamespaceBinding(null, value, scope);
          else
          aMap = new UnprefixedAttribute(qname, Text(value), aMap);
      }

      if ((ch != '/') && (ch != '>') && ('?' != ch))
      xSpace;
    }

    def findIt(base: MetaData, what: MetaData): MetaData = (base, what) match {
      case (_, Null) => Null
      case (upb: UnprefixedAttribute, upn: UnprefixedAttribute) if upb.key == upn.key => upn
      case (pb: PrefixedAttribute, pn: PrefixedAttribute) if pb.key == pn.key && pb.pre == pn.key => pn
      case _ => findIt(base, what.next)
    }

    if(!aMap.wellformed(scope)) {
      if (findIt(aMap, aMap.next) != Null) {
        reportSyntaxError( "double attribute")
      }
    }

    (aMap,scope)
  }

  /**
   * report a syntax error
   */
  override def reportSyntaxError(pos: Int, msg: String) {

    //error("MarkupParser::synerr") // DEBUG
    import scala.io._


    val line = Position.line(pos)
    val col = Position.column(pos)
    val report = curInput.descr + ":" + line + ":" + col + ": " + msg
    System.err.println(report)
    try {
      System.err.println(curInput.getLine(line))
    } catch {
      case e: Exception => // ignore
    }
    var i = 1
    while (i < col) {
      System.err.print(' ')
      i += 1
    }
    System.err.println('^')
    throw new ValidationException(report)
  }

}

object PCDataXmlParser {
  import Helpers._

  def apply(in: InputStream): Box[NodeSeq] = {
    for {
      ba <- tryo(Helpers.readWholeStream(in))
      ret <- apply(new String(ba, "UTF-8"))
    } yield ret
  }

  private def apply(source: Source): Box[NodeSeq] = {
    for {
      p <- tryo{new PCDataXmlParser(source)}
      val _ = while (p.ch != '<' && p.curInput.hasNext) p.nextch
      bd <- tryo(p.document)
      doc <- Box !! bd
    } yield (doc.children: NodeSeq)

  }

  def apply(in: String): Box[NodeSeq] = {
    var pos = 0
    val len = in.length
    def moveToLT() {
      while (pos < len && in.charAt(pos) != '<') {
        pos += 1
      }
    }
    
    moveToLT()

    // scan past <? ... ?>
    if (pos + 1 < len && in.charAt(pos + 1) == '?') {
      pos += 1
      moveToLT()
    }    

    // scan past <!DOCTYPE ....>
    if (pos + 1 < len && in.charAt(pos + 1) == '!') {
      pos += 1
      moveToLT()
    }
    
    apply(Source.fromString(in.substring(pos)))
  }
}

case class PCData(_data: String) extends Atom[String](_data) {
  /* The following code is a derivative work of scala.xml.Text */
  if (null == data)
  throw new java.lang.NullPointerException("tried to construct Text with null")

  final override def equals(x: Any) = x match {
    case s:String  => s.equals(data.toString())
    case s:Atom[_] => data == s.data
    case _ => false
  }

  /** Returns text, with some characters escaped according to the XML
   *  specification.
   *
   *  @param  sb ...
   *  @return ...
   */
  override def buildString(sb: StringBuilder) = {
    sb.append("<![CDATA[")
    sb.append(data)
    sb.append("]]>")
  }
}

object AltXML {
  val ieBadTags: Set[String] = Set("br", "hr")

  val inlineTags: Set[String] = Set("base", "meta", "link", "hr", "br",
                                    "param", "img", "area", "input", "col" )

  def toXML(n: Node, stripComment: Boolean, convertAmp: Boolean,
            ieMode: Boolean): String = {
    val sb = new StringBuilder(50000)
    toXML(n, TopScope, sb, stripComment, convertAmp, ieMode)
    sb.toString()
  }

  def toXML(n: Node, stripComment: Boolean, convertAmp: Boolean): String = {
    val sb = new StringBuilder(50000)
    toXML(n, TopScope, sb, stripComment, convertAmp)
    sb.toString()
  }

  /**
   * Appends a tree to the given stringbuffer within given namespace scope.
   *
   * @param n            the node
   * @param pscope       the parent scope
   * @param sb           stringbuffer to append to
   * @param stripComment if true, strip comments
   */
  def toXML(x: Node, pscope: NamespaceBinding, sb: StringBuilder,
            stripComment: Boolean, convertAmp: Boolean): Unit =
  x match {
    case Text(str) => escape(str, sb, !convertAmp)

    case c: PCData => c.buildString(sb)

    case c: scala.xml.PCData => c.buildString(sb)

    case up: Unparsed => up.buildString(sb)

    case a: Atom[_] if a.getClass eq classOf[Atom[_]] =>
      escape(a.data.toString, sb, !convertAmp)

    case c: Comment if !stripComment =>
      c.buildString(sb)

    case er: EntityRef if convertAmp =>
      HtmlEntities.entMap.get(er.entityName) match {
        case Some(chr) if chr.toInt >= 128 => sb.append(chr)
        case _ => er.buildString(sb)
      }

    case x: SpecialNode =>
      x.buildString(sb)

    case g: Group =>
      for (c <- g.nodes)
      toXML(c, x.scope, sb, stripComment, convertAmp)

    case e: Elem if ((e.child eq null) || e.child.isEmpty) =>
      sb.append('<')
      e.nameToString(sb)
      if (e.attributes ne null) e.attributes.buildString(sb)
      e.scope.buildString(sb, pscope)
      sb.append(" />")

    case e: Elem =>
      // print tag with namespace declarations
      sb.append('<')
      e.nameToString(sb)
      if (e.attributes ne null) e.attributes.buildString(sb)
      e.scope.buildString(sb, pscope)
      sb.append('>')
      sequenceToXML(e.child, e.scope, sb, stripComment, convertAmp)
      sb.append("</")
      e.nameToString(sb)
      sb.append('>')

    case _ => // dunno what it is, but ignore it
  }

  private def escape(str: String, sb: StringBuilder, reverse: Boolean) {
    val len = str.length
    var pos = 0
    while (pos < len) {
      str.charAt(pos) match {
        case '<' => sb.append("<")
        case '>' => sb.append(">")
        case '&' => sb.append("&")
        case '"' => sb.append(""")
        case '\n' => sb.append('\n')
        case '\r' => sb.append('\r')
        case '\t' => sb.append('\t')
        case c   =>
          if (reverse) {
            HtmlEntities.revMap.get(c) match {
              case Some(str) =>
                sb.append('&')
                sb.append(str)
                sb.append(';')
              case _ =>
                if (c >= ' ' && c != '\u0085' && !(c >= '\u007f' && c <= '\u0095')) sb.append(c)
            }
          } else
          if (c >= ' ' && c != '\u0085' && !(c >= '\u007f' && c <= '\u0095')) sb.append(c)
      }

      pos += 1
    }
  }

  /**
   * Appends a tree to the given stringbuffer within given namespace scope.
   *
   * @param n            the node
   * @param pscope       the parent scope
   * @param sb           stringbuffer to append to
   * @param stripComment if true, strip comments
   */
  def toXML(x: Node, pscope: NamespaceBinding, sb: StringBuilder,
            stripComment: Boolean, convertAmp: Boolean,
            ieMode: Boolean): Unit =
  x match {
    case Text(str) => escape(str, sb, !convertAmp)

    case c: PCData => c.buildString(sb)

    case c: scala.xml.PCData => c.buildString(sb)

    case up: Unparsed => up.buildString(sb)

    case a: Atom[_] if a.getClass eq classOf[Atom[_]] =>
      escape(a.data.toString, sb, !convertAmp)

    case c: Comment if !stripComment =>
      c.buildString(sb)

    case er: EntityRef if convertAmp =>
      HtmlEntities.entMap.get(er.entityName) match {
        case Some(chr) if chr.toInt >= 128 => sb.append(chr)
        case _ => er.buildString(sb)
      }

    case x: SpecialNode =>
      x.buildString(sb)

    case g: Group =>
      for (c <- g.nodes)
      toXML(c, x.scope, sb, stripComment, convertAmp, ieMode)

    case e: Elem if !ieMode && ((e.child eq null) || e.child.isEmpty)
      && inlineTags.contains(e.label) =>
      sb.append('<')
      e.nameToString(sb)
      if (e.attributes ne null) e.attributes.buildString(sb)
      e.scope.buildString(sb, pscope)
      sb.append(" />")

    case e: Elem if ieMode && ((e.child eq null) || e.child.isEmpty) &&
      ieBadTags.contains(e.label) =>
      sb.append('<')
      e.nameToString(sb)
      if (e.attributes ne null) e.attributes.buildString(sb)
      e.scope.buildString(sb, pscope)
      sb.append("/>")

    case e: Elem =>
      // print tag with namespace declarations
      sb.append('<')
      e.nameToString(sb)
      if (e.attributes ne null) e.attributes.buildString(sb)
      e.scope.buildString(sb, pscope)
      sb.append('>')
      sequenceToXML(e.child, e.scope, sb, stripComment, convertAmp, ieMode)
      sb.append("</")
      e.nameToString(sb)
      sb.append('>')

    case _ => // dunno what it is, but ignore it
  }


  /**
   * @param children     ...
   * @param pscope       ...
   * @param sb           ...
   * @param stripComment ...
   */
  def sequenceToXML(children: Seq[Node], pscope: NamespaceBinding,
                    sb: StringBuilder, stripComment: Boolean,
                    convertAmp: Boolean, ieMode: Boolean): Unit = {
    val it = children.iterator
    while (it.hasNext) {
      toXML(it.next, pscope, sb, stripComment, convertAmp, ieMode)
    }
  }

  /**
   * @param children     ...
   * @param pscope       ...
   * @param sb           ...
   * @param stripComment ...
   */
  def sequenceToXML(children: Seq[Node], pscope: NamespaceBinding,
                    sb: StringBuilder, stripComment: Boolean,
                    convertAmp: Boolean): Unit = {
    val it = children.iterator
    while (it.hasNext) {
      toXML(it.next, pscope, sb, stripComment, convertAmp)
    }
  }

}

Other Lift Framework examples (source code examples)

Here is a short list of links related to this Lift Framework PCDataMarkupParser.scala source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.