alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (VocabCache.java)

This example Java source code file (VocabCache.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

collection, deprecated, sequenceelement, serializable, string, util, vocabcache

The VocabCache.java Java example source code

/*
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 */

package org.deeplearning4j.models.word2vec.wordstore;


import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;

import java.io.Serializable;
import java.util.Collection;


/**
 * A VocabCache handles the storage of information needed for the word2vec look up table.
 *
 * @author Adam Gibson
 */
public interface VocabCache<T extends SequenceElement> extends Serializable {



	/**
	 * Load vocab
	 */
	void loadVocab();
	
 
	/**
	 * Vocab exists already
	 * @return
	 */
	boolean vocabExists();
	
    /**
     * Saves the vocab: this allow for reuse of word frequencies	
     */
	void saveVocab();
	


    /**
     * Returns all of the words in the vocab
     * @returns all the words in the vocab
     */
    Collection<String> words();


    /**
     * Increment the count for the given word
     * @param word the word to increment the count for
     */
    void incrementWordCount(String word);


    /**
     * Increment the count for the given word by
     * the amount increment
     * @param word the word to increment the count for
     * @param increment the amount to increment by
     */
    void incrementWordCount(String word,int increment);

    /**
     * Returns the number of times the word has occurred
     * @param word the word to retrieve the occurrence frequency for
     * @return 0 if hasn't occurred or the number of times
     * the word occurs
     */
    int wordFrequency(String word);

    /**
     * Returns true if the cache contains the given word
     * @param word the word to check for
     * @return
     */
    boolean containsWord(String word);

    /**
     * Returns the word contained at the given index or null
     * @param index the index of the word to get
     * @return the word at the given index
     */
    String wordAtIndex(int index);

    /**
     * Returns SequenceElement at the given index or null
     *
     * @param index
     * @return
     */
    T elementAtIndex(int index);

    /**
     * Returns the index of a given word
     * @param word the index of a given word
     * @return the index of a given word or -1
     * if not found
     */
    int indexOf(String word);


    /**
     * Returns all of the vocab word nodes
     * @return
     */
    Collection<T> vocabWords();


    /**
     * The total number of word occurrences
     * @return the total number of word occurrences
     */
    long totalWordOccurrences();


    /**
     *
     * @param word
     * @return
     */
     T wordFor(String word);


    /**
     *
     * @param index
     * @param word
     */
    void addWordToIndex(int index,String word);


    /**
     * Inserts the word as a vocab word
     * (it gets the vocab word from the internal token store).
     * Note that the index must be set on the token.
     * @param word the word to add to the vocab
     */
    @Deprecated
    void putVocabWord(String word);

    /**
     * Returns the number of words in the cache
     * @return the number of words in the cache
     */
    int numWords();


    /**
     * Count of documents a word appeared in
     * @param word the number of documents the word appeared in
     * @return
     */
    int docAppearedIn(String word);

    /**
     * Increment the document count
     * @param word the word to increment by
     * @param howMuch
     */
    void incrementDocCount(String word,int howMuch);


    /**
     * Set the count for the number of documents the word appears in
     * @param word the word to set the count for
     * @param count the count of the word
     */
    void setCountForDoc(String word,int count);

    /**
     * Returns the total of number of documents encountered in the corpus
     * @return the total number of docs in the corpus
     */
    int totalNumberOfDocs();


    /**
     * Increment the doc count
     */
    void incrementTotalDocCount();

    /**
     * Increment the doc count
     * @param  by the number to increment by
     */
    void incrementTotalDocCount(int by);

    /**
     * All of the tokens in the cache, (not necessarily apart of the vocab)
     * @return the tokens for this cache
     */
    Collection<T> tokens();


    /**
     * Adds a token
     * to the cache
     * @param element the word to add
     */
     void addToken(T element);

    /**
     * Returns the token (again not necessarily in the vocab)
     * for this word
     * @param word the word to get the token for
     * @return the vocab word for this token
     */
     T tokenFor(String word);

    /**
     * Returns whether the cache
     * contains this token or not
     * @param token the token to tes
     * @return whether the token exists in
     * the cache or not
     *
     */
    boolean hasToken(String token);


    /**
     * imports vocabulary
     *
     * @param vocabCache
     */
    void importVocabulary(VocabCache<T> vocabCache);

    /**
     * Updates counters
     */
    void updateWordsOccurencies();

    /**
     * Removes element with specified label from vocabulary
     * Please note: Huffman index should be updated after element removal
     *
     * @param label label of the element to be removed
     */
    void removeElement(String label);


    /**
     * Removes specified element from vocabulary
     * Please note: Huffman index should be updated after element removal
     *
     * @param element SequenceElement to be removed
     */
    void removeElement(T element);
}

Other Java examples (source code examples)

Here is a short list of links related to this Java VocabCache.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.