NotesWhat is notes.io?

Notes brand slogan

Notes - notes.io

package de.bauerkirch.files;

import java.util.ArrayList;

import de.bauerkirch.strings.StringUtils;

public class CharsetGuesser {

/**
* Detects the encode type of a file.
*
* @param filname
* the name or the link to the file
* @param detrmineEncodelimit
* represents utf8 characters found in the file,and if they are
* found we will handle the file as utf8
* @param linesLimit
* limited the number of the lines that will be searched for
* umlaut characters
* @return returns String with the type of encoding UTF-8 Or ANSI
*/
public static String checkFileEncoding(String filename, int detrmineEncodelimit, int linesLimit) throws Exception {
byte[] bytes = FileUtils.fileToBytes(filename);

//checking if the file is not empty
if (bytes.length < 1)
throw new Exception("The File has no content !");

if (bytes.length < 4)
if (bytes[0] == -17 && bytes[1] == -69 && bytes[2] == -65)
throw new Exception("The File has no content !");

//check if the file has byte order marks; if it does so it has UTF-8 encoding
String charsetFromBom = checkForBom(bytes);

if (charsetFromBom != null) {
return charsetFromBom;
}

//array of umlaut character we are looking for to determine that the file is UTF-8 or ANSI
String[] byteArray = {

"11000011 10000100", // Ä
"11000011 10100100", // ä
"11000011 10011100", // Ü
"11000011 10111100", // ü
"11000011 10010110", // Ö
"11000011 10110110", // ö
"11000011 10011111" // ß
};

//holds the binary values of the characters we are searching for in the file
ArrayList<String> matches = new ArrayList<String>();
int lcount = 0; // count the lines

//looping into the bytes after fetching all of letters in the file and converting them to bytes
for (int i = 0; i < bytes.length;) {

if (linesLimit > lcount) {

//counting the lines
if (bytes[i] == 10) {
lcount++;
}

//when the byte equals 31 or 9 or 13 that means a whitespace, we skip them to enhance the speed
if (bytes[i] == 32 || bytes[i] == 9 || bytes[i] == 13) {
i++;
continue;
}

String binaryString = Integer.toBinaryString(bytes[i]);
binaryString = StringUtils.dupeString("0", 32 - binaryString.length()) + binaryString;

binaryString = binaryString.substring(24);

if (!binaryString.equals("11000011")) {
i++;
continue;
}

for (int j = 0; j < byteArray.length; j++) {
if (Integer.toBinaryString(bytes[i + 1]).substring(24).equals(byteArray[j].substring(9))) {
matches.add(Integer.toBinaryString(bytes[i]).substring(24) + " "
+ Integer.toBinaryString(bytes[i + 1]).substring(24));
}

}
}

i++;
}
System.out.println("- lines count : " + lcount);
String matchesCount = (matches.size()>0) ? String.valueOf(matches.size()) : "no match found!";
System.out.println("- Umlaut match : " + matchesCount);
if (matches.size() >= detrmineEncodelimit)
System.out.println("- Encoding type of the file is "UTF-8"");
else
System.out.println("- Encoding type of the file is "ANSI"");

return "utf-8";
}

private static String checkForBom(byte[] bytes) throws Exception {
if (bytes.length > 0) {
if (bytes[0] != -17 && bytes[1] != -69 && bytes[2] != -65)
return null;

return "utf-8";
} else {
throw new Exception("The File has no content !");
}
}

}

//signed and unsigned value in java
     
 
what is notes.io
 

Notes is a web-based application for online taking notes. You can take your notes and share with others people. If you like taking long notes, notes.io is designed for you. To date, over 8,000,000,000+ notes created and continuing...

With notes.io;

  • * You can take a note from anywhere and any device with internet connection.
  • * You can share the notes in social platforms (YouTube, Facebook, Twitter, instagram etc.).
  • * You can quickly share your contents without website, blog and e-mail.
  • * You don't need to create any Account to share a note. As you wish you can use quick, easy and best shortened notes with sms, websites, e-mail, or messaging services (WhatsApp, iMessage, Telegram, Signal).
  • * Notes.io has fabulous infrastructure design for a short link and allows you to share the note as an easy and understandable link.

Fast: Notes.io is built for speed and performance. You can take a notes quickly and browse your archive.

Easy: Notes.io doesn’t require installation. Just write and share note!

Short: Notes.io’s url just 8 character. You’ll get shorten link of your note when you want to share. (Ex: notes.io/q )

Free: Notes.io works for 14 years and has been free since the day it was started.


You immediately create your first note and start sharing with the ones you wish. If you want to contact us, you can use the following communication channels;


Email: [email protected]

Twitter: http://twitter.com/notesio

Instagram: http://instagram.com/notes.io

Facebook: http://facebook.com/notesio



Regards;
Notes.io Team

     
 
Shortened Note Link
 
 
Looding Image
 
     
 
Long File
 
 

For written notes was greater than 18KB Unable to shorten.

To be smaller than 18KB, please organize your notes, or sign in.