/* CSV.java - read write comma separated value format * Copyright (c) 2003 Michael B. Allen * * The MIT License * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ import java.io.*; public class CSV { private static final int ST_START = 1; private static final int ST_COLLECT = 2; private static final int ST_TAILSPACE = 3; private static final int ST_END_QUOTE = 4; /** * Parse a line of CSV input and create a new String for each element at * the corresponding index in row for no more than rn elements. If trim is * true elements will be trimmed of excess space at each. The buf * parameter must be large enough to accomodate the largest element. * * @return The number of characters of input inspected */ public static int parseRow( String input, char[] buf, int bn, String row[], int rn, boolean trim ) throws IOException { return readRow( new StringReader( input ), buf, bn, row, rn, trim ); } /** * Parse a line of CSV input and create a new String for each element at * the corresponding index in row for no more than rn elements. If trim is * true elements will be trimmed of excess space at each. The buf * parameter must be large enough to accomodate the largest element. * * @return The number of characters read from input */ public static int readRow( Reader input, char[] buf, int bn, String row[], int rn, boolean trim ) throws IOException { int state, r, j, t, count, i; boolean inquotes = false; char ch = 0; state = ST_START; r = j = t = count = 0; while( r < rn && j < bn && (i = input.read()) > 0) { ch = (char)i; count++; switch( state ) { case ST_START: if( ch != '\n' && Character.isWhitespace( ch )) { if( !trim ) { buf[j++] = ch; bn--; t = j; } break; } else if( ch == '"' ) { j = t = 0; state = ST_COLLECT; inquotes = true; break; } state = ST_COLLECT; case ST_COLLECT: if( inquotes ) { if( ch == '"' ) { state = ST_END_QUOTE; break; } } else if( ch == ',' || ch == '\n' ) { row[r++] = new String( buf, 0, t ); j = t = 0; state = ST_START; inquotes = false; if( ch == '\n' ) { while( r < rn ) { row[r++] = null; } } break; } else if( ch == '"' ) { throw new IOException( "Unexpected quote in element " + (r + 1)); } buf[j++] = ch; if( !trim || Character.isWhitespace( ch ) == false ) { t = j; } break; case ST_TAILSPACE: case ST_END_QUOTE: if( ch == ',' || ch == '\n' ) { row[r++] = new String( buf, 0, t ); j = t = 0; state = ST_START; inquotes = false; if( ch == '\n' ) { while( r < rn ) { row[r++] = null; } } break; } else if( ch == '"' && state != ST_TAILSPACE ) { buf[j++] = '"'; /* nope, just an escaped quote */ t = j; state = ST_COLLECT; break; } else if( Character.isWhitespace( ch )) { state = ST_TAILSPACE; break; } throw new IOException( "Bad end quote in element " + (r + 1)); } } if( j == bn ) { throw new IOException( "Not enough room in buf for element " + (r + 1)); } if( r < rn ) { if( inquotes ) { throw new IOException( "Missing end quote in element " + (r + 1)); } row[r] = new String( buf, 0, t ); while( r < rn ) { row[r++] = null; } } return count; } /** * Print a row including necessary quotes. Note that if the row was parsed * by parseRow or readRow the output may not be identical * to the source if the source had useless quotes or space around quotes. */ public static void printRow( Writer out, char[] buf, int bn, String[] row, int rn ) throws IOException { boolean q; char[] s; int j; for( int r = 0; r < rn && row[r] != null; r++ ) { if( r > 0 ) { out.write( ',' ); } q = false; s = row[r].toCharArray(); j = 0; buf[j++] = '"'; for( int i = 0; i < s.length; i++ ) { if( s[i] == '"' ) { q = true; buf[j++] = '"'; } else if( s[i] == ',' ) { q = true; } buf[j++] = s[i]; } if( q ) { buf[j++] = '"'; out.write( buf, 0, j ); } else { out.write( buf, 1, j - 1 ); } } out.write( '\n' ); out.flush(); } /** * Just a test. */ public static void main( String[] args ) throws Exception { if( args.length < 1 ) { System.err.println( "CSV " ); return; } FileReader in = new FileReader( args[0] ); String[] row = new String[6]; char[] buf = new char[64]; int n; while( CSV.readRow( in, buf, 64, row, 6, false ) > 0 ) { CSV.printRow( new OutputStreamWriter( System.out ), buf, 64, row, 6 ); } } }