MultiChunk.java

/*
 * Syncany, www.syncany.org
 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.syncany.chunk;

import java.io.IOException;
import java.io.InputStream;

import org.syncany.database.MultiChunkEntry.MultiChunkId;

/**
 * A multichunk represents the container format that stores one to many {@link Chunk}s.
 * Multichunks are created during the chunking/deduplication process by a {@link MultiChunker}.
 *
 * <p>There are two modes to handle multichunks:
 *
 * <ul>
 *  <li>When a new multichunk is <i>written</i> and filled up with chunks, the {@link Deduper} makes sure that
 *      chunks are only added until a multichunk's minimum size has been reached, and closes the
 *      multichunk afterwards. During that process, the {@link #write(Chunk) write()} method is called
 *      for each chunk, and {@link #isFull()} is checked for the size.
 *
 *  <li>When a multichunk is <i>read</i> from a file or an input stream, it can be processed sequentially using
 *      the {@link #read()} method (not used in current code!), or in a random order using the
 *      {@link #getChunkInputStream(byte[]) getChunkInputStream()} method. Because of the latter method,
 *      <b>it is essential that random read access on a multichunk is possible</b>.
 * </ul>
 *
 * @author Philipp C. Heckel (philipp.heckel@gmail.com)
 */
public abstract class MultiChunk {
	protected MultiChunkId id;
	protected long size;
	protected int minSize; // in KB

	/**
	 * Creates a new multichunk.
	 *
	 * <p>This method should be used if the multichunk identifier is known to the
	 * calling method. This is typically the case if a new multichunk is written.
	 *
	 * @param id Unique multichunk identifier (can be randomly chosen)
	 * @param minSize Minimum multichunk size, used to determine if chunks can still be added
	 */
	public MultiChunk(MultiChunkId id, int minSize) {
		this.id = id;
		this.minSize = minSize;
		this.size = 0;
	}

	/**
	 * Creates a new multichunk.
	 *
	 * <p>This method should be used if the multichunk identifier is <i>not</i> known to the
	 * calling method. This is typically the case if a multichunk is read from a file.
	 *
	 * @param minSize Minimum multichunk size, used to determine if chunks can still be added
	 */
	public MultiChunk(int minSize) {
		this(null, minSize);
	}

	/**
	 * In write mode, this method can be used to write {@link Chunk}s to a multichunk.
	 *
	 * <p>Implementations must increase the {@link #size} by the amount written to the multichunk
	 * (input size sufficient) and make sure that (if required) a header is written for the first
	 * chunk.
	 *
	 * <p>Implementations do not have to check whether or not a multichunk is full. This should be
	 * done outside the multichunker/multichunk as part of the deduplication algorithm in the {@link Deduper}.
	 *
	 * @param chunk Chunk to be written to the multichunk container
	 * @throws IOException If an exception occurs when writing to the multichunk
	 */
	public abstract void write(Chunk chunk) throws IOException;

	/**
	 * In read mode, this method can be used to <b>sequentially</b> read {@link Chunk}s from a multichunk.
	 * The method returns a chunk until no more chunks are available, at which point it will return
	 * <code>null</code>.
	 *
	 * <p>If random read access on a multichunk is desired, the
	 * {@link #getChunkInputStream(byte[]) getChunkInputStream()} method should be used instead.
	 *
	 * @return Returns the next chunk in the opened multichunk, or <code>null</code> if no chunk is available (anymore)
	 * @throws IOException If an exception occurs when reading from the multichunk
	 */
	// TODO [low] Method is only used by tests, not necessary anymore? Required for 'cleanup'?
	public abstract Chunk read() throws IOException;

	/**
	 * In read mode, this method can be used to read {@link Chunk}s in <b>random access mode</b>, using a chunk
	 * checksum as identifier. The method returns a chunk input stream (the chunk's data) if the chunk is
	 * found, and <code>null</code> otherwise.
	 *
	 * <p>If all chunks are read from a multichunk sequentially, the {@link #read()} method should be used instead.
	 *
	 * @param checksum The checksum identifying a chunk instance
	 * @return Returns a chunk input stream (chunk data) if the chunk can be found in the multichunk, or <code>null</code> otherwise
	 * @throws IOException If an exception occurs when reading from the multichunk
	 */
	// TODO [low] Method should be named 'read(checksum)' and return a Chunk object, not an input stream, right?!
	public abstract InputStream getChunkInputStream(byte[] checksum) throws IOException;

	/**
	 * Closes a multichunk after writing/reading.
	 *
	 * <p>Implementations should close the underlying input/output stream (depending on
	 * whether the chunk was opened in read or write mode.
	 *
	 * @throws IOException If an exception occurs when closing the multichunk
	 */
	public abstract void close() throws IOException;

	/**
	 * In write mode, this method determines the fill state of the multichunk and
	 * returns whether or not a new chunk can still be added. It is used by the
	 * {@link Deduper}.
	 *
	 * @return Returns <code>true</code> if no more chunks should be added and the chunk should be closed, <code>false</code> otherwise
	 */
	public boolean isFull() {
		return size >= minSize;
	}

	public long getSize() {
		return size;
	}

	public MultiChunkId getId() {
		return id;
	}

	public void setId(MultiChunkId id) {
		this.id = id;
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + ((id == null) ? 0 : id.hashCode());
		result = prime * result + minSize;
		result = prime * result + (int) (size ^ (size >>> 32));
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj) {
			return true;
		}
		if (obj == null) {
			return false;
		}
		if (!(obj instanceof MultiChunk)) {
			return false;
		}
		MultiChunk other = (MultiChunk) obj;
		if (id == null) {
			if (other.id != null) {
				return false;
			}
		}
		else if (!id.equals(other.id)) {
			return false;
		}
		if (minSize != other.minSize) {
			return false;
		}
		if (size != other.size) {
			return false;
		}
		return true;
	}
}