alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (LocalUnstructuredDataFormatter.java)

This example Java source code file (LocalUnstructuredDataFormatter.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

arraylist, file, illegal, illegalstateexception, ioexception, iterator, labelingtype, list, localunstructureddataformatter, name,directory, string, stringbuffer, train/test, util

The LocalUnstructuredDataFormatter.java Java example source code

package org.deeplearning4j.datasets.rearrange;

import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/**
 * Rearrange an unstructured dataset
 * in to split test/train
 * on the file system
 *
 * @author Adam Gibson
 */
public class LocalUnstructuredDataFormatter {
    private File splitRootDir,rootDir;
    private File train,test;
    private LabelingType labelingType;
    private int numExamplesTotal = -1;
    private int numTestExamples = -1;
    private double percentTrain = 0.0;
    private int numExamplesToTrainOn = -1;
    private List<String> allFiles = new ArrayList<>();


    public enum LabelingType {
        NAME,DIRECTORY
    }


    /**
     *  @param destinationRootDir the destination root directory
     * @param rootDir the root directory of the original data
     * @param labelingType the labeling type to use (NAME/Label)
     * @param percentTrain the percent train to hold out
     */
    public LocalUnstructuredDataFormatter(File destinationRootDir,File rootDir,LabelingType labelingType,double percentTrain) {
        this.percentTrain = percentTrain;
        this.rootDir = rootDir;
        splitRootDir = new File(destinationRootDir,"split");
        if(splitRootDir.exists())
            throw new IllegalStateException("Train/test split already exists");
        train = new File(splitRootDir,"train");
        test = new File(splitRootDir,"test");
        train.mkdirs();
        test.mkdirs();
        this.labelingType = labelingType;
    }

    public void rearrange() {
        //accumulate all files/data
        Iterator<File> files = FileUtils.iterateFiles(rootDir,null,true);
        while(files.hasNext()) {
            allFiles.add(files.next().getAbsolutePath());
        }

        numExamplesTotal = allFiles.size();
        //randomly partition the data; afterwards split in to test train
        //based on the current position in the files
        int numExampleForTrain = (int) (percentTrain * (double) numExamplesTotal);
        this.numExamplesToTrainOn = numExampleForTrain;
        this.numTestExamples = numExamplesTotal - numExampleForTrain;
        Collections.shuffle(allFiles);
        for(int i = 0; i < numExamplesTotal; i++) {
            String dir = getNewDestination(allFiles.get(i), i < numExampleForTrain);
            File origin = new File(allFiles.get(i));
            File newDir = new File(dir);
            newDir.getParentFile().mkdirs();
            try {
                Files.copy(origin,newDir);
            } catch (IOException e) {
                e.printStackTrace();
            }

        }

    }


    public String getNewDestination(String path, boolean train) {
        File file = new File(path);
        switch (labelingType) {
            case NAME:
                if(train) {
                    File dir = new File(this.train,getNameLabel(path));
                    File name = new File(dir,file.getName());
                    return name.getAbsolutePath();

                }
                else {
                    File dir = new File(this.test,getNameLabel(path));
                    File name = new File(dir,file.getName());
                    return name.getAbsolutePath();


                }
            case DIRECTORY:
                if(train) {
                    File dir = new File(this.train,getPathLabel(path));
                    File name = new File(dir,file.getName());
                    return name.getAbsolutePath();

                }
                else {
                    File dir = new File(this.test,getPathLabel(path));
                    File name = new File(dir,file.getName());
                    return name.getAbsolutePath();

                }
        }

        throw new IllegalStateException("Illegal labeling type ");
    }


    public String getPathLabel(String path) {
        return new File(path).getParentFile().getName();
    }


    public String getNameLabel(String path) {
        int startOfFormat = path.lastIndexOf('.');
        if(startOfFormat < 0)
            throw new IllegalStateException("Illegal path; no format found");
        StringBuffer label = new StringBuffer();
        while(path.charAt(startOfFormat) != '-') {
            label.append(path.charAt(startOfFormat));
            startOfFormat--;
        }

        if(startOfFormat < 0)
            throw new IllegalStateException("Illegal path; no - found. A dash is used to inidicate a lbale.");
        return label.reverse().toString();
    }

    public int getNumExamplesTotal() {
        return numExamplesTotal;
    }

    public int getNumExamplesToTrainOn() {
        return numExamplesToTrainOn;
    }

    public int getNumTestExamples() {
        return numTestExamples;
    }

    public File getTest() {
        return test;
    }

    public File getTrain() {
        return  train;
    }



}

Other Java examples (source code examples)

Here is a short list of links related to this Java LocalUnstructuredDataFormatter.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.