alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Android example source code file (AbstractMessageParser.java)

This example Android source code file (AbstractMessageParser.java) is included in the DevDaily.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Android by Example" TM.

Java - Android tags/keywords

abstractmessageparser, arraylist, assertionerror, format, html, link, list, part, pattern, photo, regex, string, stringbuilder, token, trienode, util

The AbstractMessageParser.java Android example source code

/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.android.util;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Set;
import java.util.List;

/**
 *
 * Logic for parsing a text message typed by the user looking for smileys,
 * urls, acronyms,formatting (e.g., '*'s for bold), me commands
 * (e.g., "/me is asleep"), and punctuation.
 *
 * It constructs an array, which breaks the text up into its
 * constituent pieces, which we return to the client.
 *
 */
public abstract class AbstractMessageParser {
/**
 * Interface representing the set of resources needed by a message parser
 *
 * @author jessan (Jessan Hutchison-Quillian)
 */
  public static interface Resources {

    /** Get the known set of URL schemes. */
    public Set<String> getSchemes();

    /** Get the possible values for the last part of a domain name.
     *  Values are expected to be reversed in the Trie.
     */
    public TrieNode getDomainSuffixes();

    /** Get the smileys accepted by the parser. */
    public TrieNode getSmileys();

    /** Get the acronyms accepted by the parser. */
    public TrieNode getAcronyms();
  }

  /**
   * Subclasses must define the schemes, domains, smileys and acronyms
   * that are necessary for parsing
   */
  protected abstract Resources getResources();

  /** Music note that indicates user is listening to a music track. */
  public static final String musicNote = "\u266B ";

  private String text;
  private int nextChar;
  private int nextClass;
  private ArrayList<Part> parts;
  private ArrayList<Token> tokens;
  private HashMap<Character,Format> formatStart;
  private boolean parseSmilies;
  private boolean parseAcronyms;
  private boolean parseFormatting;
  private boolean parseUrls;
  private boolean parseMeText;
  private boolean parseMusic;

  /**
   * Create a message parser to parse urls, formatting, acronyms, smileys,
   * /me text and  music
   *
   * @param text the text to parse
   */
  public AbstractMessageParser(String text) {
    this(text, true, true, true, true, true, true);
  }

  /**
   * Create a message parser, specifying the kinds of text to parse
   *
   * @param text the text to parse
   *
   */
  public AbstractMessageParser(String text, boolean parseSmilies,
      boolean parseAcronyms, boolean parseFormatting, boolean parseUrls,
      boolean parseMusic, boolean parseMeText) {
    this.text = text;
    this.nextChar = 0;
    this.nextClass = 10;
    this.parts = new ArrayList<Part>();
    this.tokens = new ArrayList<Token>();
    this.formatStart = new HashMap<Character,Format>();
    this.parseSmilies = parseSmilies;
    this.parseAcronyms = parseAcronyms;
    this.parseFormatting = parseFormatting;
    this.parseUrls = parseUrls;
    this.parseMusic = parseMusic;
    this.parseMeText = parseMeText;
  }

  /** Returns the raw text being parsed. */
  public final String getRawText() { return text; }

  /** Return the number of parts. */
  public final int getPartCount() { return parts.size(); }

  /** Return the part at the given index. */
  public final Part getPart(int index) { return parts.get(index); }

  /** Return the list of parts from the parsed text */
  public final List<Part> getParts() { return parts; }

  /** Parses the text string into an internal representation. */
  public void parse() {
    // Look for music track (of which there would be only one and it'll be the
    // first token)
    if (parseMusicTrack()) {
      buildParts(null);
      return;
    }

    // Look for me commands.
    String meText = null;
    if (parseMeText && text.startsWith("/me") && (text.length() > 3) &&
        Character.isWhitespace(text.charAt(3))) {
      meText = text.substring(0, 4);
      text = text.substring(4);
    }

    // Break the text into tokens.
    boolean wasSmiley = false;
    while (nextChar < text.length()) {
      if (!isWordBreak(nextChar)) {
        if (!wasSmiley || !isSmileyBreak(nextChar)) {
          throw new AssertionError("last chunk did not end at word break");
        }
      }

      if (parseSmiley()) {
        wasSmiley = true;
      } else {
        wasSmiley = false;

        if (!parseAcronym() && !parseURL() && !parseFormatting()) {
          parseText();
        }
      }
    }

    // Trim the whitespace before and after media components.
    for (int i = 0; i < tokens.size(); ++i) {
      if (tokens.get(i).isMedia()) {
        if ((i > 0) && (tokens.get(i - 1) instanceof Html)) {
          ((Html)tokens.get(i - 1)).trimLeadingWhitespace();
        }
        if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) {
          ((Html)tokens.get(i + 1)).trimTrailingWhitespace();
        }
      }
    }

    // Remove any empty html tokens.
    for (int i = 0; i < tokens.size(); ++i) {
      if (tokens.get(i).isHtml() &&
          (tokens.get(i).toHtml(true).length() == 0)) {
        tokens.remove(i);
        --i;  // visit this index again
      }
    }

    buildParts(meText);
  }

  /**
   * Get a the appropriate Token for a given URL
   *
   * @param text the anchor text
   * @param url the url
   *
   */
  public static Token tokenForUrl(String url, String text) {
    if(url == null) {
      return null;
    }

    //Look for video links
    Video video = Video.matchURL(url, text);
    if (video != null) {
      return video;
    }

    // Look for video links.
    YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text);
    if (ytVideo != null) {
      return ytVideo;
    }

    // Look for photo links.
    Photo photo = Photo.matchURL(url, text);
    if (photo != null) {
      return photo;
    }

    // Look for photo links.
    FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text);
    if (flickrPhoto != null) {
      return flickrPhoto;
    }

    //Not media, so must be a regular URL
    return new Link(url, text);
  }

  /**
   * Builds the parts list.
   *
   * @param meText any meText parsed from the message
   */
  private void buildParts(String meText) {
    for (int i = 0; i < tokens.size(); ++i) {
      Token token = tokens.get(i);
      if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) {
        parts.add(new Part());
      }
      lastPart().add(token);
    }

    // The first part inherits the meText of the line.
    if (parts.size() > 0) {
      parts.get(0).setMeText(meText);
    }
  }

  /** Returns the last part in the list. */
  private Part lastPart() { return parts.get(parts.size() - 1); }

  /**
   * Looks for a music track (\u266B is first character, everything else is
   * track info).
   */
  private boolean parseMusicTrack() {

    if (parseMusic && text.startsWith(musicNote)) {
      addToken(new MusicTrack(text.substring(musicNote.length())));
      nextChar = text.length();
      return true;
    }
    return false;
  }

  /** Consumes all of the text in the next word . */
  private void parseText() {
    StringBuilder buf = new StringBuilder();
    int start = nextChar;
    do {
      char ch = text.charAt(nextChar++);
      switch (ch) {
        case '<':  buf.append("<"); break;
        case '>':  buf.append(">"); break;
        case '&':  buf.append("&"); break;
        case '"':  buf.append("""); break;
        case '\'':  buf.append("'"); break;
        case '\n':  buf.append("<br>"); break;
        default:  buf.append(ch); break;
      }
    } while (!isWordBreak(nextChar));

    addToken(new Html(text.substring(start, nextChar), buf.toString()));
  }

  /**
   * Looks for smileys (e.g., ":)") in the text.  The set of known smileys is
   * loaded from a file into a trie at server start.
   */
  private boolean parseSmiley() {
    if(!parseSmilies) {
      return false;
    }
    TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar,
                                  true);
    if (match == null) {
      return false;
    } else {
      int previousCharClass = getCharClass(nextChar - 1);
      int nextCharClass = getCharClass(nextChar + match.getText().length());
      if ((previousCharClass == 2 || previousCharClass == 3)
          && (nextCharClass == 2 || nextCharClass == 3)) {
        return false;
      }
      addToken(new Smiley(match.getText()));
      nextChar += match.getText().length();
      return true;
    }
  }

  /** Looks for acronyms (e.g., "lol") in the text.
   */
  private boolean parseAcronym() {
    if(!parseAcronyms) {
      return false;
    }
    TrieNode match = longestMatch(getResources().getAcronyms(), this, nextChar);
    if (match == null) {
      return false;
    } else {
      addToken(new Acronym(match.getText(), match.getValue()));
      nextChar += match.getText().length();
      return true;
    }
  }

  /** Determines if this is an allowable domain character. */
  private boolean isDomainChar(char c) {
    return c == '-' || Character.isLetter(c) || Character.isDigit(c);
  }

  /** Determines if the given string is a valid domain. */
  private boolean isValidDomain(String domain) {
    // For hostnames, check that it ends with a known domain suffix
    if (matches(getResources().getDomainSuffixes(), reverse(domain))) {
      return true;
    }
    return false;
  }

  /**
   * Looks for a URL in two possible forms:  either a proper URL with a known
   * scheme or a domain name optionally followed by a path, query, or query.
   */
  private boolean parseURL() {
    // Make sure this is a valid place to start a URL.
    if (!parseUrls || !isURLBreak(nextChar)) {
      return false;
    }

    int start = nextChar;

    // Search for the first block of letters.
    int index = start;
    while ((index < text.length()) && isDomainChar(text.charAt(index))) {
      index += 1;
    }

    String url = "";
    boolean done = false;

    if (index == text.length()) {
      return false;
    } else if (text.charAt(index) == ':') {
      // Make sure this is a known scheme.
      String scheme = text.substring(nextChar, index);
      if (!getResources().getSchemes().contains(scheme)) {
        return false;
      }
    } else if (text.charAt(index) == '.') {
      // Search for the end of the domain name.
      while (index < text.length()) {
        char ch = text.charAt(index);
        if ((ch != '.') && !isDomainChar(ch)) {
          break;
        } else {
          index += 1;
        }
      }

      // Make sure the domain name has a valid suffix.  Since tries look for
      // prefix matches, we reverse all the strings to get suffix comparisons.
      String domain = text.substring(nextChar, index);
      if (!isValidDomain(domain)) {
        return false;
      }

      // Search for a port.  We deal with this specially because a colon can
      // also be a punctuation character.
      if ((index + 1 < text.length()) && (text.charAt(index) == ':')) {
        char ch = text.charAt(index + 1);
        if (Character.isDigit(ch)) {
          index += 1;
          while ((index < text.length()) &&
                 Character.isDigit(text.charAt(index))) {
            index += 1;
          }
        }
      }

      // The domain name should be followed by end of line, whitespace,
      // punctuation, or a colon, slash, question, or hash character.  The
      // tricky part here is that some URL characters are also punctuation, so
      // we need to distinguish them.  Since we looked for ports above, a colon
      // is always punctuation here.  To distinguish '?' cases, we look at the
      // character that follows it.
      if (index == text.length()) {
        done = true;
      } else {
        char ch = text.charAt(index);
        if (ch == '?') {
          // If the next character is whitespace or punctuation (or missing),
          // then this question mark looks like punctuation.
          if (index + 1 == text.length()) {
            done = true;
          } else {
            char ch2 = text.charAt(index + 1);
            if (Character.isWhitespace(ch2) || isPunctuation(ch2)) {
              done = true;
            }
          }
        } else if (isPunctuation(ch)) {
          done = true;
        } else if (Character.isWhitespace(ch)) {
          done = true;
        } else if ((ch == '/') || (ch == '#')) {
          // In this case, the URL is not done.  We will search for the end of
          // it below.
        } else {
          return false;
        }
      }

      // We will assume the user meant HTTP.  (One weird case is where they
      // type a port of 443.  That could mean HTTPS, but they might also want
      // HTTP.  We'll let them specify if they don't want HTTP.)
      url = "http://";
    } else {
      return false;
    }

    // If the URL is not done, search for the end, which is just before the
    // next whitespace character.
    if (!done) {
      while ((index < text.length()) &&
             !Character.isWhitespace(text.charAt(index))) {
        index += 1;
      }
    }

    String urlText = text.substring(start, index);
    url += urlText;

    // Figure out the appropriate token type.
    addURLToken(url, urlText);

    nextChar = index;
    return true;
  }

  /**
   * Adds the appropriate token for the given URL.  This might be a simple
   * link or it might be a recognized media type.
   */
  private void addURLToken(String url, String text) {
     addToken(tokenForUrl(url, text));
  }

  /**
   * Deal with formatting characters.
   *
   * Parsing is as follows:
   *  - Treat all contiguous strings of formatting characters as one block.
   *    (This method processes one block.)
   *  - Only a single instance of a particular format character within a block
   *    is used to determine whether to turn on/off that type of formatting;
   *    other instances simply print the character itself.
   *  - If the format is to be turned on, we use the _first_ instance; if it
   *    is to be turned off, we use the _last_ instance (by appending the
   *    format.)
   *
   * Example:
   *   **string** turns into <b>*string*
   */
  private boolean parseFormatting() {
    if(!parseFormatting) {
      return false;
    }
    int endChar = nextChar;
    while ((endChar < text.length()) && isFormatChar(text.charAt(endChar))) {
      endChar += 1;
    }

    if ((endChar == nextChar) || !isWordBreak(endChar)) {
      return false;
    }

    // Keeps track of whether we've seen a character (in map if we've seen it)
    // and whether we should append a closing format token (if value in
    // map is TRUE).  Linked hashmap for consistent ordering.
    LinkedHashMap<Character, Boolean> seenCharacters =
        new LinkedHashMap<Character, Boolean>();

    for (int index = nextChar; index < endChar; ++index) {
      char ch = text.charAt(index);
      Character key = Character.valueOf(ch);
      if (seenCharacters.containsKey(key)) {
        // Already seen this character, just append an unmatched token, which
        // will print plaintext character
        addToken(new Format(ch, false));
      } else {
        Format start = formatStart.get(key);
        if (start != null) {
          // Match the start token, and ask an end token to be appended
          start.setMatched(true);
          formatStart.remove(key);
          seenCharacters.put(key, Boolean.TRUE);
        } else {
          // Append start token
          start = new Format(ch, true);
          formatStart.put(key, start);
          addToken(start);
          seenCharacters.put(key, Boolean.FALSE);
        }
      }
    }

    // Append any necessary end tokens
    for (Character key : seenCharacters.keySet()) {
      if (seenCharacters.get(key) == Boolean.TRUE) {
        Format end = new Format(key.charValue(), false);
        end.setMatched(true);
        addToken(end);
      }
    }

    nextChar = endChar;
    return true;
  }

  /** Determines whether the given index could be a possible word break. */
  private boolean isWordBreak(int index) {
    return getCharClass(index - 1) != getCharClass(index);
  }

  /** Determines whether the given index could be a possible smiley break. */
  private boolean isSmileyBreak(int index) {
    if (index > 0 && index < text.length()) {
      if (isSmileyBreak(text.charAt(index - 1), text.charAt(index))) {
        return true;
      }
    }

    return false;
  }

  /**
   * Verifies that the character before the given index is end of line,
   * whitespace, or punctuation.
   */
  private boolean isURLBreak(int index) {
    switch (getCharClass(index - 1)) {
      case 2:
      case 3:
      case 4:
        return false;

      case 0:
      case 1:
      default:
        return true;
    }
  }

  /** Returns the class for the character at the given index. */
  private int getCharClass(int index) {
    if ((index < 0) || (text.length() <= index)) {
      return 0;
    }

    char ch = text.charAt(index);
    if (Character.isWhitespace(ch)) {
      return 1;
    } else if (Character.isLetter(ch)) {
      return 2;
    } else if (Character.isDigit(ch)) {
      return 3;
    } else if (isPunctuation(ch)) {
      // For punctuation, we return a unique value every time so that they are
      // always different from any other character.  Punctuation should always
      // be considered a possible word break.
      return ++nextClass;
    } else {
      return 4;
    }
  }

  /**
   * Returns true if <code>c1 could be the last character of
   * a smiley and <code>c2 could be the first character of
   * a different smiley, if {@link #isWordBreak} would not already
   * recognize that this is possible.
   */
  private static boolean isSmileyBreak(char c1, char c2) {
    switch (c1) {
      /*    
       * These characters can end smileys, but don't normally end words.
       */
      case '$': case '&': case '*': case '+': case '-':
      case '/': case '<': case '=': case '>': case '@':
      case '[': case '\\': case ']': case '^': case '|':
      case '}': case '~':
        switch (c2) {
          /*
           * These characters can begin smileys, but don't normally
           * begin words.
           */
          case '#': case '$': case '%': case '*': case '/':
          case '<': case '=': case '>': case '@': case '[':
          case '\\': case '^': case '~':
            return true;
        }
    }

    return false;
  }

  /** Determines whether the given character is punctuation. */
  private static boolean isPunctuation(char ch) {
    switch (ch) {
      case '.': case ',': case '"': case ':': case ';':
      case '?': case '!': case '(': case ')':
        return true;

      default:
        return false;
    }
  }

  /**
   * Determines whether the given character is the beginning or end of a
   * section with special formatting.
   */
  private static boolean isFormatChar(char ch) {
    switch (ch) {
      case '*': case '_': case '^':
        return true;

      default:
        return false;
    }
  }

  /** Represents a unit of parsed output. */
  public static abstract class Token {
    public enum Type {

      HTML ("html"),
      FORMAT ("format"),  // subtype of HTML
      LINK ("l"),
      SMILEY ("e"),
      ACRONYM ("a"),
      MUSIC ("m"),
      GOOGLE_VIDEO ("v"),
      YOUTUBE_VIDEO ("yt"),
      PHOTO ("p"),
      FLICKR ("f");

      //stringreps for HTML and FORMAT don't really matter
      //because they don't define getInfo(), which is where it is used
      //For the other types, code depends on their stringreps
      private String stringRep;

      Type(String stringRep) {
        this.stringRep = stringRep;
      }

      /** {@inheritDoc} */
      public String toString() {
        return this.stringRep;
      }
    }

    protected Type type;
    protected String text;

    protected Token(Type type, String text) {
      this.type = type;
      this.text = text;
    }

    /** Returns the type of the token. */
    public Type getType() { return type; }

    /**
     * Get the relevant information about a token
     *
     * @return a list of strings representing the token, not null
     *         The first item is always a string representation of the type
     */
    public List<String> getInfo() {
      List<String> info = new ArrayList();
      info.add(getType().toString());
      return info;
    }

    /** Returns the raw text of the token. */
    public String getRawText() { return text; }

    public boolean isMedia() { return false; }
    public abstract boolean isHtml();
    public boolean isArray() { return !isHtml(); }

    public String toHtml(boolean caps) { throw new AssertionError("not html"); }

    // The token can change the caps of the text after that point.
    public boolean controlCaps() { return false; }
    public boolean setCaps() { return false; }
  }

  /** Represents a simple string of html text. */
  public static class Html extends Token {
    private String html;

    public Html(String text, String html) {
      super(Type.HTML, text);
      this.html = html;
    }

    public boolean isHtml() { return true; }
    public String toHtml(boolean caps) {
      return caps ? html.toUpperCase() : html;
    }
    /**
     * Not supported. Info should not be needed for this type
     */
    public List<String> getInfo() {
      throw new UnsupportedOperationException();
    }

    public void trimLeadingWhitespace() {
      text = trimLeadingWhitespace(text);
      html = trimLeadingWhitespace(html);
    }

    public void trimTrailingWhitespace() {
      text = trimTrailingWhitespace(text);
      html = trimTrailingWhitespace(html);
    }

    private static String trimLeadingWhitespace(String text) {
      int index = 0;
      while ((index < text.length()) &&
             Character.isWhitespace(text.charAt(index))) {
        ++index;
      }
      return text.substring(index);
    }

    public static String trimTrailingWhitespace(String text) {
      int index = text.length();
      while ((index > 0) && Character.isWhitespace(text.charAt(index - 1))) {
        --index;
      }
      return text.substring(0, index);
    }
  }

  /** Represents a music track token at the beginning. */
  public static class MusicTrack extends Token {
    private String track;

    public MusicTrack(String track) {
      super(Type.MUSIC, track);
      this.track = track;
    }

    public String getTrack() { return track; }

    public boolean isHtml() { return false; }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getTrack());
      return info;
    }
  }

  /** Represents a link that was found in the input. */
  public static class Link extends Token {
    private String url;

    public Link(String url, String text) {
      super(Type.LINK, text);
      this.url = url;
    }

    public String getURL() { return url; }

    public boolean isHtml() { return false; }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getURL());
      info.add(getRawText());
      return info;
    }
  }

  /** Represents a link to a Google Video. */
  public static class Video extends Token {
    /** Pattern for a video URL. */
    private static final Pattern URL_PATTERN = Pattern.compile(
        "(?i)http://video\\.google\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/videoplay\\?"
        + ".*?\\bdocid=(-?\\d+).*");

    private String docid;

    public Video(String docid, String text) {
      super(Type.GOOGLE_VIDEO, text);
      this.docid = docid;
    }

    public String getDocID() { return docid; }

    public boolean isHtml() { return false; }
    public boolean isMedia() { return true; }

    /** Returns a Video object if the given url is to a video. */
    public static Video matchURL(String url, String text) {
      Matcher m = URL_PATTERN.matcher(url);
      if (m.matches()) {
        return new Video(m.group(1), text);
      } else {
        return null;
      }
    }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getRssUrl(docid));
      info.add(getURL(docid));
      return info;
    }

    /** Returns the URL for the RSS description of the given video. */
    public static String getRssUrl(String docid) {
      return "http://video.google.com/videofeed"
             + "?type=docid&output=rss&sourceid=gtalk&docid=" + docid;
    }

    /** (For testing purposes:) Returns a video URL with the given parts.  */
    public static String getURL(String docid) {
      return getURL(docid, null);
    }

    /** (For testing purposes:) Returns a video URL with the given parts.  */
    public static String getURL(String docid, String extraParams) {
      if (extraParams == null) {
        extraParams = "";
      } else if (extraParams.length() > 0) {
        extraParams += "&";
      }
      return "http://video.google.com/videoplay?" + extraParams
             + "docid=" + docid;
    }
  }

  /** Represents a link to a YouTube video. */
  public static class YouTubeVideo extends Token {
    /** Pattern for a video URL. */
    private static final Pattern URL_PATTERN = Pattern.compile(
        "(?i)http://(?:[a-z0-9]+\\.)?youtube\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/watch\\?"
        + ".*\\bv=([-_a-zA-Z0-9=]+).*");

    private String docid;

    public YouTubeVideo(String docid, String text) {
      super(Type.YOUTUBE_VIDEO, text);
      this.docid = docid;
    }

    public String getDocID() { return docid; }

    public boolean isHtml() { return false; }
    public boolean isMedia() { return true; }

    /** Returns a Video object if the given url is to a video. */
    public static YouTubeVideo matchURL(String url, String text) {
      Matcher m = URL_PATTERN.matcher(url);
      if (m.matches()) {
        return new YouTubeVideo(m.group(1), text);
      } else {
        return null;
      }
    }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getRssUrl(docid));
      info.add(getURL(docid));
      return info;
    }

    /** Returns the URL for the RSS description of the given video. */
    public static String getRssUrl(String docid) {
      return "http://youtube.com/watch?v=" + docid;
    }

    /** (For testing purposes:) Returns a video URL with the given parts.  */
    public static String getURL(String docid) {
      return getURL(docid, null);
    }

    /** (For testing purposes:) Returns a video URL with the given parts.  */
    public static String getURL(String docid, String extraParams) {
      if (extraParams == null) {
        extraParams = "";
      } else if (extraParams.length() > 0) {
        extraParams += "&";
      }
      return "http://youtube.com/watch?" + extraParams + "v=" + docid;
    }

    /** (For testing purposes:) Returns a video URL with the given parts.
      * @param http If true, includes http://
      * @param prefix If non-null/non-blank, adds to URL before youtube.com.
      *   (e.g., prefix="br." --> "br.youtube.com")
      */
    public static String getPrefixedURL(boolean http, String prefix,
                                        String docid, String extraParams) {
      String protocol = "";

      if (http) {
        protocol = "http://";
      }

      if (prefix == null) {
        prefix = "";
      }

      if (extraParams == null) {
        extraParams = "";
      } else if (extraParams.length() > 0) {
        extraParams += "&";
      }

      return protocol + prefix + "youtube.com/watch?" + extraParams + "v=" +
              docid;
    }
  }

  /** Represents a link to a Picasa photo or album. */
  public static class Photo extends Token {
    /** Pattern for an album or photo URL. */
    // TODO (katyarogers) searchbrowse includes search lists and tags,
    // it follows a different pattern than albums - would be nice to add later
    private static final Pattern URL_PATTERN = Pattern.compile(
        "http://picasaweb.google.com/([^/?#&]+)/+((?!searchbrowse)[^/?#&]+)(?:/|/photo)?(?:\\?[^#]*)?(?:#(.*))?");

    private String user;
    private String album;
    private String photo;  // null for albums

    public Photo(String user, String album, String photo, String text) {
      super(Type.PHOTO, text);
      this.user = user;
      this.album = album;
      this.photo = photo;
    }

    public String getUser() { return user; }
    public String getAlbum() { return album; }
    public String getPhoto() { return photo; }

    public boolean isHtml() { return false; }
    public boolean isMedia() { return true; }

    /** Returns a Photo object if the given url is to a photo or album. */
    public static Photo matchURL(String url, String text) {
      Matcher m = URL_PATTERN.matcher(url);
      if (m.matches()) {
        return new Photo(m.group(1), m.group(2), m.group(3), text);
      } else {
        return null;
      }
    }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getRssUrl(getUser()));
      info.add(getAlbumURL(getUser(), getAlbum()));
      if (getPhoto() != null) {
        info.add(getPhotoURL(getUser(), getAlbum(), getPhoto()));
      } else {
        info.add((String)null);
      }
      return info;
    }

    /** Returns the URL for the RSS description of the user's albums. */
    public static String getRssUrl(String user) {
      return "http://picasaweb.google.com/data/feed/api/user/" + user +
        "?category=album&alt=rss";
    }

    /** Returns the URL for an album. */
    public static String getAlbumURL(String user, String album) {
      return "http://picasaweb.google.com/" + user + "/" + album;
    }

    /** Returns the URL for a particular photo. */
    public static String getPhotoURL(String user, String album, String photo) {
      return "http://picasaweb.google.com/" + user + "/" + album + "/photo#"
             + photo;
    }
  }

  /** Represents a link to a Flickr photo or album. */
  public static class FlickrPhoto extends Token {
    /** Pattern for a user album or photo URL. */
    private static final Pattern URL_PATTERN = Pattern.compile(
        "http://(?:www.)?flickr.com/photos/([^/?#&]+)/?([^/?#&]+)?/?.*");
    private static final Pattern GROUPING_PATTERN = Pattern.compile(
        "http://(?:www.)?flickr.com/photos/([^/?#&]+)/(tags|sets)/" +
        "([^/?#&]+)/?");

    private static final String SETS = "sets";
    private static final String TAGS = "tags";

    private String user;
    private String photo;      // null for user album
    private String grouping;   // either "tags" or "sets"
    private String groupingId; // sets or tags identifier

    public FlickrPhoto(String user, String photo, String grouping,
                       String groupingId, String text) {
      super(Type.FLICKR, text);

      /* System wide tags look like the URL to a Flickr user. */
      if (!TAGS.equals(user)) {
        this.user = user;
        // Don't consider slide show URL a photo
        this.photo = (!"show".equals(photo) ? photo : null);
        this.grouping = grouping;
        this.groupingId = groupingId;
      } else {
        this.user = null;
        this.photo = null;
        this.grouping = TAGS;
        this.groupingId = photo;
      }
    }

    public String getUser() { return user; }
    public String getPhoto() { return photo; }
    public String getGrouping() { return grouping; }
    public String getGroupingId() { return groupingId; }

    public boolean isHtml() { return false; }
    public boolean isMedia() { return true; }

    /**
     * Returns a FlickrPhoto object if the given url is to a photo or Flickr
     * user.
     */
    public static FlickrPhoto matchURL(String url, String text) {
      Matcher m = GROUPING_PATTERN.matcher(url);
      if (m.matches()) {
        return new FlickrPhoto(m.group(1), null, m.group(2), m.group(3), text);
      }

      m = URL_PATTERN.matcher(url);
      if (m.matches()) {
        return new FlickrPhoto(m.group(1), m.group(2), null, null, text);
      } else {
        return null;
      }
    }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getUrl());
      info.add(getUser() != null ? getUser() : "");
      info.add(getPhoto() != null ? getPhoto() : "");
      info.add(getGrouping() != null ? getGrouping() : "");
      info.add(getGroupingId() != null ? getGroupingId() : "");
      return info;
    }

    public String getUrl() {
      if (SETS.equals(grouping)) {
        return getUserSetsURL(user, groupingId);
      } else if (TAGS.equals(grouping)) {
        if (user != null) {
          return getUserTagsURL(user, groupingId);
        } else {
          return getTagsURL(groupingId);
        }
      } else if (photo != null) {
        return getPhotoURL(user, photo);
      } else {
        return getUserURL(user);
      }
    }

    /** Returns the URL for the RSS description. */
    public static String getRssUrl(String user) {
      return null;
    }

    /** Returns the URL for a particular tag. */
    public static String getTagsURL(String tag) {
      return "http://flickr.com/photos/tags/" + tag;
    }

    /** Returns the URL to the user's Flickr homepage. */
    public static String getUserURL(String user) {
      return "http://flickr.com/photos/" + user;
    }

    /** Returns the URL for a particular photo. */
    public static String getPhotoURL(String user, String photo) {
      return "http://flickr.com/photos/" + user + "/" + photo;
    }

    /** Returns the URL for a user tag photo set. */
    public static String getUserTagsURL(String user, String tagId) {
      return "http://flickr.com/photos/" + user + "/tags/" + tagId;
    }

    /** Returns the URL for user set. */
    public static String getUserSetsURL(String user, String setId) {
      return "http://flickr.com/photos/" + user + "/sets/" + setId;
    }
  }

  /** Represents a smiley that was found in the input. */
  public static class Smiley extends Token {
    // TODO: Pass the SWF URL down to the client.

    public Smiley(String text) {
      super(Type.SMILEY, text);
    }

    public boolean isHtml() { return false; }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getRawText());
      return info;
    }
  }

  /** Represents an acronym that was found in the input. */
  public static class Acronym extends Token {
    private String value;
    // TODO: SWF

    public Acronym(String text, String value) {
      super(Type.ACRONYM, text);
      this.value = value;
    }

    public String getValue() { return value; }

    public boolean isHtml() { return false; }

    public List<String> getInfo() {
      List<String> info = super.getInfo();
      info.add(getRawText());
      info.add(getValue());
      return info;
    }
  }

  /** Represents a character that changes formatting. */
  public static class Format extends Token {
    private char ch;
    private boolean start;
    private boolean matched;

    public Format(char ch, boolean start) {
      super(Type.FORMAT, String.valueOf(ch));
      this.ch = ch;
      this.start = start;
    }

    public void setMatched(boolean matched) { this.matched = matched; }

    public boolean isHtml() { return true; }

    public String toHtml(boolean caps) {
      // This character only implies special formatting if it was matched.
      // Otherwise, it was just a plain old character.
      if (matched) {
        return start ? getFormatStart(ch) : getFormatEnd(ch);
      } else {
        // We have to make sure we escape HTML characters as usual.
        return (ch == '"') ? """ : String.valueOf(ch);
      }
    }

    /**
     * Not supported. Info should not be needed for this type
     */
    public List<String> getInfo() {
      throw new UnsupportedOperationException();
    }

    public boolean controlCaps() { return (ch == '^'); }
    public boolean setCaps() { return start; }

    private String getFormatStart(char ch) {
      switch (ch) {
        case '*': return "<b>";
        case '_': return "<i>";
        case '^': return "<b>"; // TODO: all caps
        case '"': return "<font color=\"#999999\">\u201c";
        default: throw new AssertionError("unknown format '" + ch + "'");
      }
    }

    private String getFormatEnd(char ch) {
      switch (ch) {
        case '*': return "</b>";
        case '_': return "</i>";
        case '^': return "</font>"; // TODO: all caps
        case '"': return "\u201d</font>";
        default: throw new AssertionError("unknown format '" + ch + "'");
      }
    }
  }

  /** Adds the given token to the parsed output. */
  private void addToken(Token token) {
    tokens.add(token);
  }

  /** Converts the entire message into a single HTML display string. */
  public String toHtml() {
    StringBuilder html = new StringBuilder();

    for (Part part : parts) {
      boolean caps = false;

      html.append("<p>");
      for (Token token : part.getTokens()) {
        if (token.isHtml()) {
          html.append(token.toHtml(caps));
        } else {
          switch (token.getType()) {
          case LINK:
            html.append("<a href=\"");
            html.append(((Link)token).getURL());
            html.append("\">");
            html.append(token.getRawText());
            html.append("</a>");
            break;

          case SMILEY:
            // TODO: link to an appropriate image
            html.append(token.getRawText());
            break;

          case ACRONYM:
            html.append(token.getRawText());
            break;

          case MUSIC:
            // TODO: include a music glyph
            html.append(((MusicTrack)token).getTrack());
            break;

          case GOOGLE_VIDEO:
            // TODO: include a Google Video icon
            html.append("<a href=\"");
            html.append(((Video)token).getURL(((Video)token).getDocID()));
            html.append("\">");
            html.append(token.getRawText());
            html.append("</a>");
            break;

          case YOUTUBE_VIDEO:
            // TODO: include a YouTube icon
            html.append("<a href=\"");
            html.append(((YouTubeVideo)token).getURL(
                ((YouTubeVideo)token).getDocID()));
            html.append("\">");
            html.append(token.getRawText());
            html.append("</a>");
            break;

          case PHOTO: {
            // TODO: include a Picasa Web icon
            html.append("<a href=\"");
            html.append(Photo.getAlbumURL(
                ((Photo)token).getUser(), ((Photo)token).getAlbum()));
            html.append("\">");
            html.append(token.getRawText());
            html.append("</a>");
            break;
          }

          case FLICKR:
            // TODO: include a Flickr icon
            Photo p = (Photo) token;
            html.append("<a href=\"");
            html.append(((FlickrPhoto)token).getUrl());
            html.append("\">");
            html.append(token.getRawText());
            html.append("</a>");
            break;

          default:
            throw new AssertionError("unknown token type: " + token.getType());
          }
        }

        if (token.controlCaps()) {
          caps = token.setCaps();
        }
      }
      html.append("</p>\n");
    }

    return html.toString();
  }

  /** Returns the reverse of the given string. */
  protected static String reverse(String str) {
    StringBuilder buf = new StringBuilder();
    for (int i = str.length() - 1; i >= 0; --i) {
      buf.append(str.charAt(i));
    }
    return buf.toString();
  }

  public static class TrieNode {
    private final HashMap<Character,TrieNode> children =
        new HashMap<Character,TrieNode>();
    private String text;
    private String value;

    public TrieNode() { this(""); }
    public TrieNode(String text) {
      this.text = text;
    }

    public final boolean exists() { return value != null; }
    public final String getText() { return text; }
    public final String getValue() { return value; }
    public void setValue(String value) { this.value = value; }

    public TrieNode getChild(char ch) {
      return children.get(Character.valueOf(ch));
    }

    public TrieNode getOrCreateChild(char ch) {
      Character key = Character.valueOf(ch);
      TrieNode node = children.get(key);
      if (node == null) {
        node = new TrieNode(text + String.valueOf(ch));
        children.put(key, node);
      }
      return node;
    }

    /** Adds the given string into the trie. */
    public static  void addToTrie(TrieNode root, String str, String value) {
      int index = 0;
      while (index < str.length()) {
        root = root.getOrCreateChild(str.charAt(index++));
      }
      root.setValue(value);
    }
  }



  /** Determines whether the given string is in the given trie. */
  private static boolean matches(TrieNode root, String str) {
    int index = 0;
    while (index < str.length()) {
      root = root.getChild(str.charAt(index++));
      if (root == null) {
        break;
      } else if (root.exists()) {
        return true;
      }
    }
    return false;
  }

  /**
   * Returns the longest substring of the given string, starting at the given
   * index, that exists in the trie.
   */
  private static TrieNode longestMatch(
      TrieNode root, AbstractMessageParser p, int start) {
    return longestMatch(root, p, start, false);
  }

  /**
   * Returns the longest substring of the given string, starting at the given
   * index, that exists in the trie, with a special tokenizing case for
   * smileys if specified.
   */
  private static TrieNode longestMatch(
      TrieNode root, AbstractMessageParser p, int start, boolean smiley) {
    int index = start;
    TrieNode bestMatch = null;
    while (index < p.getRawText().length()) {
      root = root.getChild(p.getRawText().charAt(index++));
      if (root == null) {
        break;
      } else if (root.exists()) {
        if (p.isWordBreak(index)) {
          bestMatch = root;
        } else if (smiley && p.isSmileyBreak(index)) {
          bestMatch = root;
        }
      }
    }
    return bestMatch;
  }


  /** Represents set of tokens that are delivered as a single message. */
  public static class Part {
    private String meText;
    private ArrayList<Token> tokens;

    public Part() {
      this.tokens = new ArrayList<Token>();
    }

    public String getType(boolean isSend) {
      return (isSend ? "s" : "r") + getPartType();
    }

    private String getPartType() {
      if (isMedia()) {
        return "d";
      } else if (meText != null) {
        return "m";
      } else {
        return "";
      }
    }

    public boolean isMedia() {
      return (tokens.size() == 1) && tokens.get(0).isMedia();
    }
    /**
     * Convenience method for getting the Token of a Part that represents
     * a media Token. Parts of this kind will always only have a single Token
     *
     * @return if this.isMedia(),
     *         returns the Token representing the media contained in this Part,
     *         otherwise returns null;
     */
    public Token getMediaToken() {
      if(isMedia()) {
        return tokens.get(0);
      }
      return null;
    }

    /** Adds the given token to this part. */
    public void add(Token token) {
      if (isMedia()) {
        throw new AssertionError("media ");
      }
       tokens.add(token);
    }

    public void setMeText(String meText) {
      this.meText = meText;
    }

    /** Returns the original text of this part. */
    public String getRawText() {
      StringBuilder buf = new StringBuilder();
      if (meText != null) {
        buf.append(meText);
      }
      for (int i = 0; i < tokens.size(); ++i) {
        buf.append(tokens.get(i).getRawText());
      }
      return buf.toString();
    }

    /** Returns the tokens in this part. */
    public ArrayList<Token> getTokens() { return tokens; }

    /** Adds the tokens into the given builder as an array. */
//    public void toArray(JSArrayBuilder array) {
//      if (isMedia()) {
//        // For media, we send its array (i.e., we don't wrap this in another
//        // array as we do for non-media parts).
//        tokens.get(0).toArray(array);
//      } else {
//        array.beginArray();
//        addToArray(array);
//        array.endArray();
//      }
//    }
  }
}

Other Android examples (source code examples)

Here is a short list of links related to this Android AbstractMessageParser.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.