alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (VocabularyHolderTest.java)

This example Java source code file (VocabularyHolderTest.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

exception, inmemorylookupcache, test, vocabcache, vocabularyholder, vocabularyholdertest

The VocabularyHolderTest.java Java example source code

package org.deeplearning4j.models.word2vec.wordstore;

import org.deeplearning4j.models.word2vec.wordstore.inmemory.InMemoryLookupCache;
import org.junit.Test;

import static org.junit.Assert.*;

/**
 * Created by fartovii on 08.11.15.
 */
public class VocabularyHolderTest {

    @Test
    public void testTransferBackToVocabCache() throws Exception {
        VocabularyHolder holder = new VocabularyHolder();
        holder.addWord("test");
        holder.addWord("tests");
        holder.addWord("testz");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("testz");

        InMemoryLookupCache cache = new InMemoryLookupCache(false);
        holder.updateHuffmanCodes();
        holder.transferBackToVocabCache(cache);

        // checking word frequency transfer
        assertEquals(3,cache.numWords());
        assertEquals(1, cache.wordFrequency("test"));
        assertEquals(2, cache.wordFrequency("testz"));
        assertEquals(3, cache.wordFrequency("tests"));


        // checking Huffman tree transfer
        assertEquals("tests", cache.wordAtIndex(0));
        assertEquals("testz", cache.wordAtIndex(1));
        assertEquals("test", cache.wordAtIndex(2));
    }

    @Test
    public void testConstructor() throws Exception {
        InMemoryLookupCache cache = new InMemoryLookupCache(true);
        VocabularyHolder holder = new VocabularyHolder(cache, false);

        // no more UNK token here
        assertEquals(0, holder.numWords());
    }

    /**
     * In this test we make sure SPECIAL words are not affected by truncation in extending vocab
     * @throws Exception
     */
    @Test
    public void testSpecial1() throws Exception {
        VocabularyHolder holder = new VocabularyHolder.Builder()
                .minWordFrequency(1)
                .build();

        holder.addWord("test");
        holder.addWord("tests");

        holder.truncateVocabulary();

        assertEquals(2, holder.numWords());

        VocabCache cache = new InMemoryLookupCache();
        holder.transferBackToVocabCache(cache);

        VocabularyHolder holder2 = new VocabularyHolder.Builder()
                .externalCache(cache)
                .minWordFrequency(10)
//                .markAsSpecial(true)
                .build();

        holder2.addWord("testz");
        assertEquals(3, holder2.numWords());

        holder2.truncateVocabulary();
        assertEquals(2, holder2.numWords());
    }

    @Test
    public void testScavenger1() throws Exception {
        VocabularyHolder holder = new VocabularyHolder.Builder()
                .minWordFrequency(5)
                .hugeModelExpected(true)
                .scavengerActivationThreshold(1000000)  // this value doesn't really matters, since we'll call for scavenger manually
                .scavengerRetentionDelay(3)
                .build();

        holder.addWord("test");
        holder.addWord("tests");

        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");

        holder.activateScavenger();
        assertEquals(2, holder.numWords());
        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        // after third activation, word "test" should be removed
        holder.activateScavenger();
        assertEquals(1, holder.numWords());
    }

    @Test
    public void testScavenger2() throws Exception {
        VocabularyHolder holder = new VocabularyHolder.Builder()
                .minWordFrequency(5)
                .hugeModelExpected(true)
                .scavengerActivationThreshold(1000000)  // this value doesn't really matters, since we'll call for scavenger manually
                .scavengerRetentionDelay(3)
                .build();

        holder.addWord("test");
        holder.incrementWordCounter("test");

        holder.addWord("tests");

        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");

        holder.activateScavenger();
        assertEquals(2, holder.numWords());
        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        // after third activation, word "test" should be removed
        holder.activateScavenger();
        assertEquals(1, holder.numWords());
    }

    @Test
    public void testScavenger3() throws Exception {
        VocabularyHolder holder = new VocabularyHolder.Builder()
                .minWordFrequency(5)
                .hugeModelExpected(true)
                .scavengerActivationThreshold(1000000)  // this value doesn't really matters, since we'll call for scavenger manually
                .scavengerRetentionDelay(3)
                .build();

        holder.addWord("test");

        holder.activateScavenger();
        assertEquals(1, holder.numWords());

        holder.incrementWordCounter("test");
        holder.addWord("tests");

        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");


        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        // after third activation, word "test" should NOT be removed, since at point 0 we have freq == 1, and 2 in the following tests
        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        // here we should have all retention points shifted, and word "test" should be removed
        holder.activateScavenger();
        assertEquals(1, holder.numWords());
    }

    @Test
    public void testScavenger4() throws Exception {
        VocabularyHolder holder = new VocabularyHolder.Builder()
                .minWordFrequency(5)
                .hugeModelExpected(true)
                .scavengerActivationThreshold(1000000)  // this value doesn't really matters, since we'll call for scavenger manually
                .scavengerRetentionDelay(3)
                .build();

        holder.addWord("test");

        holder.activateScavenger();
        assertEquals(1, holder.numWords());

        holder.incrementWordCounter("test");

        holder.addWord("tests");

        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");
        holder.incrementWordCounter("tests");


        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        // after third activation, word "test" should NOT be removed, since at point 0 we have freq == 1, and 2 in the following tests
        holder.activateScavenger();
        assertEquals(2, holder.numWords());

        holder.incrementWordCounter("test");

        // here we should have all retention points shifted, and word "test" should NOT be removed, since now it's above the scavenger threshold
        holder.activateScavenger();
        assertEquals(2, holder.numWords());
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java VocabularyHolderTest.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.