001/* 002 * This file is part of McIDAS-V 003 * 004 * Copyright 2007-2016 005 * Space Science and Engineering Center (SSEC) 006 * University of Wisconsin - Madison 007 * 1225 W. Dayton Street, Madison, WI 53706, USA 008 * https://www.ssec.wisc.edu/mcidas 009 * 010 * All Rights Reserved 011 * 012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and 013 * some McIDAS-V source code is based on IDV and VisAD source code. 014 * 015 * McIDAS-V is free software; you can redistribute it and/or modify 016 * it under the terms of the GNU Lesser Public License as published by 017 * the Free Software Foundation; either version 3 of the License, or 018 * (at your option) any later version. 019 * 020 * McIDAS-V is distributed in the hope that it will be useful, 021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 023 * GNU Lesser Public License for more details. 024 * 025 * You should have received a copy of the GNU Lesser Public License 026 * along with this program. If not, see http://www.gnu.org/licenses. 027 */ 028 029package edu.wisc.ssec.mcidasv.util; 030 031import org.mozilla.universalchardet.UniversalDetector; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.io.FileInputStream; 036import java.io.IOException; 037import java.io.InputStream; 038 039/** 040 * Based on the juniversalchardet example code. 041 * 042 * This code is primarily used by the {@literal "editFile"} function in {@code interactive.py}. 043 */ 044public final class DetectCharset { 045 046 private static final Logger logger = LoggerFactory.getLogger(DetectCharset.class); 047 048 private DetectCharset() { } 049 050 public static String detect(String file) throws IOException { 051 try (InputStream fis = new FileInputStream(file)) { 052 UniversalDetector detector = new UniversalDetector(null); 053 int nread; 054 byte[] buf = new byte[4096]; 055 while (((nread = fis.read(buf)) > 0) && !detector.isDone()) { 056 detector.handleData(buf, 0, nread); 057 } 058 059 detector.dataEnd(); 060 061 String encoding = detector.getDetectedCharset(); 062 if (encoding != null) { 063 logger.trace("detected encoding '{}'", encoding); 064 } else { 065 logger.trace("no encoding detected!"); 066 } 067 068 detector.reset(); 069 return encoding; 070 } 071 } 072}