001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2016
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see http://www.gnu.org/licenses.
027 */
028
029package edu.wisc.ssec.mcidasv.util;
030
031import org.mozilla.universalchardet.UniversalDetector;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import java.io.FileInputStream;
036import java.io.IOException;
037import java.io.InputStream;
038
039/**
040 * Based on the juniversalchardet example code.
041 *
042 * This code is primarily used by the {@literal "editFile"} function in {@code interactive.py}.
043 */
044public final class DetectCharset {
045
046    private static final Logger logger = LoggerFactory.getLogger(DetectCharset.class);
047
048    private DetectCharset() { }
049
050    public static String detect(String file) throws IOException {
051        try (InputStream fis = new FileInputStream(file)) {
052            UniversalDetector detector = new UniversalDetector(null);
053            int nread;
054            byte[] buf = new byte[4096];
055            while (((nread = fis.read(buf)) > 0) && !detector.isDone()) {
056                detector.handleData(buf, 0, nread);
057            }
058
059            detector.dataEnd();
060
061            String encoding = detector.getDetectedCharset();
062            if (encoding != null) {
063                logger.trace("detected encoding '{}'", encoding);
064            } else {
065                logger.trace("no encoding detected!");
066            }
067
068            detector.reset();
069            return encoding;
070        }
071    }
072}