tail-n handle UTF8 and ASCII

 avatar
user_0606344
java
a month ago
3.8 kB
6
Indexable
Never
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.Deque;

public class TailNWithByteBufferUTF8 {

    private static final int BUFFER_SIZE = 4096;  // Size of the ByteBuffer
    private static final Charset UTF8 = StandardCharsets.UTF_8;

    public static Deque<String> tail(String fileName, int n) throws IOException {
        Deque<String> result = new ArrayDeque<>(n);  // Efficient deque to store last n lines
        CharsetDecoder decoder = UTF8.newDecoder();  // UTF-8 decoder for handling multi-byte chars

        try (RandomAccessFile file = new RandomAccessFile(fileName, "r");
             FileChannel channel = file.getChannel()) {

            long fileSize = channel.size();
            ByteBuffer buffer = ByteBuffer.allocate(BUFFER_SIZE);
            long pos = fileSize;
            int lineCount = 0;
            StringBuilder currentLine = new StringBuilder();

            while (pos > 0) {
                // Calculate the position and the amount of data to read
                long readStart = Math.max(0, pos - BUFFER_SIZE);
                int readSize = (int) (pos - readStart);
                pos = readStart;

                // Read data into the buffer
                channel.position(pos);
                buffer.clear();
                channel.read(buffer);
                buffer.flip();  // Prepare the buffer for reading

                // Decode the buffer content as UTF-8
                CharBuffer charBuffer = decodeUTF8Buffer(buffer, decoder);

                // Process the characters from the end backwards
                for (int i = charBuffer.length() - 1; i >= 0; i--) {
                    char c = charBuffer.charAt(i);
                    if (c == '\n') {
                        if (currentLine.length() > 0) {
                            result.addFirst(currentLine.reverse().toString());
                            currentLine.setLength(0);  // Clear the buffer
                            lineCount++;
                        }
                        if (lineCount == n) {
                            return result;  // Stop if we have the required number of lines
                        }
                    } else {
                        currentLine.append(c);
                    }
                }
            }

            // Add the remaining line if the file doesn't end with a newline
            if (currentLine.length() > 0) {
                result.addFirst(currentLine.reverse().toString());
            }
        }

        return result;
    }

    // Method to decode ByteBuffer to CharBuffer using UTF-8
    private static CharBuffer decodeUTF8Buffer(ByteBuffer buffer, CharsetDecoder decoder) throws CharacterCodingException {
        decoder.reset();
        CharBuffer charBuffer = CharBuffer.allocate(buffer.remaining());
        CoderResult result = decoder.decode(buffer, charBuffer, true);

        if (result.isError()) {
            result.throwException();
        }
        charBuffer.flip();
        return charBuffer;
    }

    public static void main(String[] args) {
        try {
            String fileName = "your_static_file.txt";  // Replace with your file path
            int n = 100;  // Number of lines to retrieve
            Deque<String> lastLines = tail(fileName, n);

            for (String line : lastLines) {
                System.out.println(line);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
Leave a Comment