alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (BinaryCoOccurrenceReader.java)

This example Java source code file (BinaryCoOccurrenceReader.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

arrayblockingqueue, asyncdeserializationthread, atomicboolean, atomicinteger, binarycooccurrencereader, cooccurrenceweight, countmap, exception, inputstream, nio, nonnull, override, runnable, runtimeexception, streamreaderthread, threading, threads, util

The BinaryCoOccurrenceReader.java Java example source code

package org.deeplearning4j.models.glove.count;

import lombok.NonNull;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * Binary implementation of CoOccurenceReader interface, used to provide off-memory storage for cooccurrence maps generated for GloVe
 *
 * @author raver119@gmail.com
 */
public class BinaryCoOccurrenceReader<T extends SequenceElement> implements CoOccurenceReader {
    private VocabCache<T> vocabCache;
    private InputStream inputStream;
    private File file;
    private ArrayBlockingQueue<CoOccurrenceWeight buffer;
    int workers = Math.max(Runtime.getRuntime().availableProcessors() - 1, 1);
    private StreamReaderThread readerThread;
    private CountMap<T> countMap;


    protected static final Logger logger = LoggerFactory.getLogger(BinaryCoOccurrenceReader.class);

    public BinaryCoOccurrenceReader(@NonNull File file, @NonNull VocabCache<T> vocabCache, CountMap map) {
        this.vocabCache = vocabCache;
        this.file = file;
        this.countMap = map;
        buffer = new ArrayBlockingQueue<CoOccurrenceWeight(200000);

        try {
            inputStream = new BufferedInputStream(new FileInputStream(this.file), 100 * 1024 * 1024);
            //inputStream = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
            readerThread = new StreamReaderThread(inputStream);
            readerThread.start();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public boolean hasMoreObjects() {

        if (buffer.size() > 0) return true;

        try {
            return readerThread.hasMoreObjects() || buffer.size() > 0;
        } catch (Exception e) {
            throw new RuntimeException(e);
            //return false;
        }
    }

    @Override
    public CoOccurrenceWeight<T> nextObject() {
        if (buffer.size() > 0) {
            return buffer.poll();
        } else {
            // buffer can be starved, or we're already at the end of file.
            if (readerThread.hasMoreObjects()) {
                try {
                    return buffer.poll(3, TimeUnit.SECONDS);
                } catch (Exception e) {
                    return null;
                }
            }
        }


        return null;
        /*
        try {
            CoOccurrenceWeight<T> ret = new CoOccurrenceWeight<>();
            ret.setElement1(vocabCache.elementAtIndex(inputStream.readInt()));
            ret.setElement2(vocabCache.elementAtIndex(inputStream.readInt()));
            ret.setWeight(inputStream.readDouble());

            return ret;
        } catch (Exception e) {
            return null;
        }
        */
    }

    @Override
    public void finish() {
        try {
            if (inputStream != null) inputStream.close();
        } catch (Exception e) {
            //
        }
    }

    private class StreamReaderThread extends Thread implements Runnable {
        private InputStream stream;
        private AtomicBoolean isReading = new AtomicBoolean(false);

        public StreamReaderThread(@NonNull InputStream stream) {
            this.stream = stream;
            isReading.set(false);
        }

        @Override
        public void run() {
            try {
                // we read pre-defined number of objects as byte array
                byte[] array = new byte[16 * 500000];
                while (true) {
                    int count = stream.read(array);

                    isReading.set(true);
                    if (count == 0) break;

                    // now we deserialize them in separate threads to gain some speedup, if possible
                    List<AsyncDeserializationThread> threads = new ArrayList<>();
                    AtomicInteger internalPosition = new AtomicInteger(0);

                    for (int t = 0; t < workers; t++ ) {
                        threads.add(t, new AsyncDeserializationThread(t, array, buffer, internalPosition, count));
                        threads.get(t).start();
                    }

                    // we'll block this cycle untill all objects are fit into queue
                    for (int t = 0; t < workers; t++) {
                        try {
                            threads.get(t).join();
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }

                    isReading.set(false);
                    if (count < array.length) break;
                }

            } catch (Exception e) {
                isReading.set(false);
                throw new RuntimeException(e);
            }
        }

        public boolean hasMoreObjects() {
            try {
                return stream.available() > 0 || isReading.get();
            } catch (Exception e) {
                return false;
            } finally {
                ;
            }
        }
    }

    /**
     * Utility class that accepts byte array as input, and deserialize it into set of CoOccurrenceWeight objects
     */
    private class AsyncDeserializationThread extends Thread implements Runnable {
        private int threadId;
        private byte[] arrayReference;
        private ArrayBlockingQueue<CoOccurrenceWeight targetBuffer;
        private AtomicInteger pointer;
        private int limit;

        public AsyncDeserializationThread(int threadId, @NonNull byte[] array, @NonNull ArrayBlockingQueue<CoOccurrenceWeight targetBuffer, @NonNull AtomicInteger sharedPointer, int limit) {
            this.threadId = threadId;
            this.arrayReference = array;
            this.targetBuffer = targetBuffer;
            this.pointer = sharedPointer;
            this.limit = limit;


            setName("AsynDeserialization thread " + this.threadId);
        }

        @Override
        public void run() {
            ByteBuffer bB = ByteBuffer.wrap(arrayReference);
            int position = 0;
            while ((position = pointer.getAndAdd(16)) < this.limit) {
                if (position >= limit) {
                    continue;
                }


                int e1idx = bB.getInt(position);
                int e2idx = bB.getInt(position + 4);
                double eW = bB.getDouble( position + 8);


                CoOccurrenceWeight<T> object = new CoOccurrenceWeight();
                object.setElement1(vocabCache.elementAtIndex(e1idx));
                object.setElement2(vocabCache.elementAtIndex(e2idx));

                if (countMap != null) {
                    double mW = countMap.getCount(object.getElement1(), object.getElement2());

                    if (mW > 0) {
                        eW += mW;
                        countMap.removePair(object.getElement1(), object.getElement2());
                    }
                }
                object.setWeight(eW);

                try {
                    targetBuffer.put(object);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java BinaryCoOccurrenceReader.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.