|
Java example source code file (GloveTest.java)
The GloveTest.java Java example source code/* * * * Copyright 2015 Skymind,Inc. * * * * Licensed under the Apache License, Version 2.0 (the "License"); * * you may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, software * * distributed under the License is distributed on an "AS IS" BASIS, * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * * limitations under the License. * */ package org.deeplearning4j.models.glove; import static org.junit.Assert.*; import org.canova.api.util.ClassPathResource; import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer; import org.deeplearning4j.models.embeddings.wordvectors.WordVectors; import org.deeplearning4j.models.word2vec.VocabWord; import org.deeplearning4j.text.sentenceiterator.BasicLineIterator; import org.deeplearning4j.text.sentenceiterator.LineSentenceIterator; import org.deeplearning4j.text.sentenceiterator.SentenceIterator; import org.deeplearning4j.text.sentenceiterator.SentencePreProcessor; import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor; import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory; import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import org.nd4j.linalg.api.ndarray.INDArray; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.util.Collection; /** * Created by agibsonccc on 12/3/14. */ public class GloveTest { private static final Logger log = LoggerFactory.getLogger(GloveTest.class); private Glove glove; private SentenceIterator iter; @Before public void before() throws Exception { ClassPathResource resource = new ClassPathResource("/raw_sentences.txt"); File file = resource.getFile(); iter = new LineSentenceIterator(file); iter.setPreProcessor(new SentencePreProcessor() { @Override public String preProcess(String sentence) { return sentence.toLowerCase(); } }); } @Ignore @Test public void testGlove() throws Exception { /* glove = new Glove.Builder().iterate(iter).symmetric(true).shuffle(true) .minWordFrequency(1).iterations(10).learningRate(0.1) .layerSize(300) .build(); glove.fit(); Collection<String> words = glove.wordsNearest("day", 20); log.info("Nearest words to 'day': " + words); assertTrue(words.contains("week")); */ } @Test public void testGloVe1() throws Exception { File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile(); SentenceIterator iter = new BasicLineIterator(inputFile.getAbsolutePath()); // Split on white spaces in the line to get words TokenizerFactory t = new DefaultTokenizerFactory(); t.setTokenPreProcessor(new CommonPreprocessor()); Glove glove = new Glove.Builder() .iterate(iter) .tokenizerFactory(t) .alpha(0.75) .learningRate(0.1) .epochs(45) .xMax(100) .shuffle(true) .symmetric(true) .build(); glove.fit(); double simD = glove.similarity("day", "night"); double simP = glove.similarity("best", "police"); log.info("Day/night similarity: " + simD); log.info("Best/police similarity: " + simP); Collection<String> words = glove.wordsNearest("day", 10); log.info("Nearest words to 'day': " + words); assertTrue(simD > 0.7); // actually simP should be somewhere at 0 assertTrue(simP < 0.5); assertTrue(words.contains("night")); assertTrue(words.contains("year")); assertTrue(words.contains("week")); File tempFile = File.createTempFile("glove", "temp"); tempFile.deleteOnExit(); INDArray day1 = glove.getWordVectorMatrix("day").dup(); WordVectorSerializer.writeWordVectors(glove, tempFile); WordVectors vectors = WordVectorSerializer.loadTxtVectors(tempFile); INDArray day2 = vectors.getWordVectorMatrix("day").dup(); assertEquals(day1, day2); tempFile.delete(); } } Other Java examples (source code examples)Here is a short list of links related to this Java GloveTest.java source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.