001    /*
002     * $HeadURL: http://juliusdavies.ca/svn/not-yet-commons-ssl/tags/commons-ssl-0.3.9/src/java/org/apache/commons/ssl/Base64.java $
003     * $Revision: 121 $
004     * $Date: 2007-11-13 21:26:57 -0800 (Tue, 13 Nov 2007) $
005     *
006     * ====================================================================
007     * Licensed to the Apache Software Foundation (ASF) under one
008     * or more contributor license agreements.  See the NOTICE file
009     * distributed with this work for additional information
010     * regarding copyright ownership.  The ASF licenses this file
011     * to you under the Apache License, Version 2.0 (the
012     * "License"); you may not use this file except in compliance
013     * with the License.  You may obtain a copy of the License at
014     *
015     *   http://www.apache.org/licenses/LICENSE-2.0
016     *
017     * Unless required by applicable law or agreed to in writing,
018     * software distributed under the License is distributed on an
019     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
020     * KIND, either express or implied.  See the License for the
021     * specific language governing permissions and limitations
022     * under the License.
023     * ====================================================================
024     *
025     * This software consists of voluntary contributions made by many
026     * individuals on behalf of the Apache Software Foundation.  For more
027     * information on the Apache Software Foundation, please see
028     * <http://www.apache.org/>.
029     *
030     */
031    
032    package org.apache.commons.ssl;
033    
034    /**
035     * Provides Base64 encoding and decoding as defined by RFC 2045.
036     * <p/>
037     * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite>
038     * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One:
039     * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p>
040     *
041     * @author Apache Software Foundation
042     * @version $Id: Base64.java 121 2007-11-14 05:26:57Z julius $
043     * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
044     * @since 1.0-dev
045     */
046    public class Base64 {
047    
048        /**
049         * Chunk size per RFC 2045 section 6.8.
050         * <p/>
051         * <p>The character limit does not count the trailing CRLF, but counts
052         * all other characters, including any equal signs.</p>
053         *
054         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
055         */
056        static final int CHUNK_SIZE = 76;
057    
058        /**
059         * Chunk separator per RFC 2045 section 2.1.
060         *
061         * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
062         */
063        static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
064    
065        /** The base length. */
066        static final int BASELENGTH = 255;
067    
068        /** Lookup length. */
069        static final int LOOKUPLENGTH = 64;
070    
071        /** Used to calculate the number of bits in a byte. */
072        static final int EIGHTBIT = 8;
073    
074        /** Used when encoding something which has fewer than 24 bits. */
075        static final int SIXTEENBIT = 16;
076    
077        /** Used to determine how many bits data contains. */
078        static final int TWENTYFOURBITGROUP = 24;
079    
080        /** Used to get the number of Quadruples. */
081        static final int FOURBYTE = 4;
082    
083        /** Used to test the sign of a byte. */
084        static final int SIGN = -128;
085    
086        /** Byte used to pad output. */
087        static final byte PAD = (byte) '=';
088    
089        /**
090         * Contains the Base64 values <code>0</code> through <code>63</code> accessed by using character encodings as
091         * indices.
092         * <p/>
093         * For example, <code>base64Alphabet['+']</code> returns <code>62</code>.
094         * </p>
095         * <p/>
096         * The value of undefined encodings is <code>-1</code>.
097         * </p>
098         */
099        private static byte[] base64Alphabet = new byte[BASELENGTH];
100    
101        /**
102         * <p/>
103         * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
104         * <code>z</code>, followed by <code>0</code> through <code>9</code>, followed by <code>+</code>, and
105         * <code>/</code>.
106         * </p>
107         * <p/>
108         * This array is accessed by using character values as indices.
109         * </p>
110         * <p/>
111         * For example, <code>lookUpBase64Alphabet[62] </code> returns <code>'+'</code>.
112         * </p>
113         */
114        private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
115    
116        // Populating the lookup and character arrays
117        static {
118            for (int i = 0; i < BASELENGTH; i++) {
119                base64Alphabet[i] = (byte) -1;
120            }
121            for (int i = 'Z'; i >= 'A'; i--) {
122                base64Alphabet[i] = (byte) (i - 'A');
123            }
124            for (int i = 'z'; i >= 'a'; i--) {
125                base64Alphabet[i] = (byte) (i - 'a' + 26);
126            }
127            for (int i = '9'; i >= '0'; i--) {
128                base64Alphabet[i] = (byte) (i - '0' + 52);
129            }
130    
131            base64Alphabet['+'] = 62;
132            base64Alphabet['/'] = 63;
133    
134            for (int i = 0; i <= 25; i++) {
135                lookUpBase64Alphabet[i] = (byte) ('A' + i);
136            }
137    
138            for (int i = 26, j = 0; i <= 51; i++, j++) {
139                lookUpBase64Alphabet[i] = (byte) ('a' + j);
140            }
141    
142            for (int i = 52, j = 0; i <= 61; i++, j++) {
143                lookUpBase64Alphabet[i] = (byte) ('0' + j);
144            }
145    
146            lookUpBase64Alphabet[62] = (byte) '+';
147            lookUpBase64Alphabet[63] = (byte) '/';
148        }
149    
150        /**
151         * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
152         *
153         * @param b The value to test
154         * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
155         */
156        public static boolean isBase64(byte b) {
157            return (b == PAD) || (b >= 0 && base64Alphabet[b] >= 0);
158        }
159    
160        /**
161         * Tests a given byte array to see if it contains
162         * only valid characters within the Base64 alphabet.
163         *
164         * @param arrayOctect byte array to test
165         * @return <code>true</code> if all bytes are valid characters in the Base64
166         *         alphabet or if the byte array is empty; false, otherwise
167         */
168        public static boolean isArrayByteBase64(byte[] arrayOctect) {
169    
170            arrayOctect = discardWhitespace(arrayOctect);
171    
172            int length = arrayOctect.length;
173            if (length == 0) {
174                // shouldn't a 0 length array be valid base64 data?
175                // return false;
176                return true;
177            }
178            for (int i = 0; i < length; i++) {
179                if (!isBase64(arrayOctect[i])) {
180                    return false;
181                }
182            }
183            return true;
184        }
185    
186        /**
187         * Encodes binary data using the base64 algorithm but
188         * does not chunk the output.
189         *
190         * @param binaryData binary data to encode
191         * @return Base64 characters
192         */
193        public static byte[] encodeBase64(byte[] binaryData) {
194            return encodeBase64(binaryData, false);
195        }
196    
197        /**
198         * Encodes binary data using the base64 algorithm and chunks
199         * the encoded output into 76 character blocks
200         *
201         * @param binaryData binary data to encode
202         * @return Base64 characters chunked in 76 character blocks
203         */
204        public static byte[] encodeBase64Chunked(byte[] binaryData) {
205            return encodeBase64(binaryData, true);
206        }
207    
208    
209        /**
210         * Decodes an Object using the base64 algorithm.  This method
211         * is provided in order to satisfy the requirements of the
212         * Decoder interface, and will throw a DecoderException if the
213         * supplied object is not of type byte[].
214         *
215         * @param pObject Object to decode
216         * @return An object (of type byte[]) containing the
217         *         binary data which corresponds to the byte[] supplied.
218         * @throws IllegalArgumentException if the parameter supplied is not
219         *                                  of type byte[]
220         */
221        public Object decode(Object pObject) throws IllegalArgumentException {
222            if (!(pObject instanceof byte[])) {
223                throw new IllegalArgumentException("Parameter supplied to Base64 decode is not a byte[]");
224            }
225            return decode((byte[]) pObject);
226        }
227    
228        /**
229         * Decodes a byte[] containing containing
230         * characters in the Base64 alphabet.
231         *
232         * @param pArray A byte array containing Base64 character data
233         * @return a byte array containing binary data
234         */
235        public byte[] decode(byte[] pArray) {
236            return decodeBase64(pArray);
237        }
238    
239        /**
240         * Encodes binary data using the base64 algorithm, optionally
241         * chunking the output into 76 character blocks.
242         *
243         * @param binaryData Array containing binary data to encode.
244         * @param isChunked  if <code>true</code> this encoder will chunk
245         *                   the base64 output into 76 character blocks
246         * @return Base64-encoded data.
247         */
248        public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
249            int lengthDataBits = binaryData.length * EIGHTBIT;
250            int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
251            int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
252            byte encodedData[];
253            int encodedDataLength;
254            int nbrChunks = 0;
255    
256            if (fewerThan24bits != 0) {
257                //data not divisible by 24 bit
258                encodedDataLength = (numberTriplets + 1) * 4;
259            } else {
260                // 16 or 8 bit
261                encodedDataLength = numberTriplets * 4;
262            }
263    
264            // If the output is to be "chunked" into 76 character sections,
265            // for compliance with RFC 2045 MIME, then it is important to
266            // allow for extra length to account for the separator(s)
267            if (isChunked) {
268    
269                nbrChunks =
270                    (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
271                encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
272            }
273    
274            encodedData = new byte[encodedDataLength];
275    
276            byte k, l, b1, b2, b3;
277            int dataIndex;
278            int i;
279            int encodedIndex = 0;
280            int nextSeparatorIndex = CHUNK_SIZE;
281            int chunksSoFar = 0;
282    
283            //log.debug("number of triplets = " + numberTriplets);
284            for (i = 0; i < numberTriplets; i++) {
285                dataIndex = i * 3;
286                b1 = binaryData[dataIndex];
287                b2 = binaryData[dataIndex + 1];
288                b3 = binaryData[dataIndex + 2];
289    
290                //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
291    
292                l = (byte) (b2 & 0x0f);
293                k = (byte) (b1 & 0x03);
294    
295                byte val1 =
296                    ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
297                byte val2 =
298                    ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
299                byte val3 =
300                    ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
301    
302                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
303                //log.debug( "val2 = " + val2 );
304                //log.debug( "k4   = " + (k<<4) );
305                //log.debug(  "vak  = " + (val2 | (k<<4)) );
306                encodedData[encodedIndex + 1] =
307                    lookUpBase64Alphabet[val2 | (k << 4)];
308                encodedData[encodedIndex + 2] =
309                    lookUpBase64Alphabet[(l << 2) | val3];
310                encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
311    
312                encodedIndex += 4;
313    
314                // If we are chunking, let's put a chunk separator down.
315                if (isChunked) {
316                    // this assumes that CHUNK_SIZE % 4 == 0
317                    if (encodedIndex == nextSeparatorIndex) {
318                        System.arraycopy(CHUNK_SEPARATOR,
319                            0,
320                            encodedData,
321                            encodedIndex,
322                            CHUNK_SEPARATOR.length);
323                        chunksSoFar++;
324                        nextSeparatorIndex =
325                            (CHUNK_SIZE * (chunksSoFar + 1)) +
326                            (chunksSoFar * CHUNK_SEPARATOR.length);
327                        encodedIndex += CHUNK_SEPARATOR.length;
328                    }
329                }
330            }
331    
332            // form integral number of 6-bit groups
333            dataIndex = i * 3;
334    
335            if (fewerThan24bits == EIGHTBIT) {
336                b1 = binaryData[dataIndex];
337                k = (byte) (b1 & 0x03);
338                //log.debug("b1=" + b1);
339                //log.debug("b1<<2 = " + (b1>>2) );
340                byte val1 =
341                    ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
342                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
343                encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
344                encodedData[encodedIndex + 2] = PAD;
345                encodedData[encodedIndex + 3] = PAD;
346            } else if (fewerThan24bits == SIXTEENBIT) {
347    
348                b1 = binaryData[dataIndex];
349                b2 = binaryData[dataIndex + 1];
350                l = (byte) (b2 & 0x0f);
351                k = (byte) (b1 & 0x03);
352    
353                byte val1 =
354                    ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
355                byte val2 =
356                    ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
357    
358                encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
359                encodedData[encodedIndex + 1] =
360                    lookUpBase64Alphabet[val2 | (k << 4)];
361                encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
362                encodedData[encodedIndex + 3] = PAD;
363            }
364    
365            if (isChunked) {
366                // we also add a separator to the end of the final chunk.
367                if (chunksSoFar < nbrChunks) {
368                    System.arraycopy(CHUNK_SEPARATOR,
369                        0,
370                        encodedData,
371                        encodedDataLength - CHUNK_SEPARATOR.length,
372                        CHUNK_SEPARATOR.length);
373                }
374            }
375    
376            return encodedData;
377        }
378    
379        /**
380         * Decodes Base64 data into octects
381         *
382         * @param base64Data Byte array containing Base64 data
383         * @return Array containing decoded data.
384         */
385        public static byte[] decodeBase64(byte[] base64Data) {
386            // RFC 2045 requires that we discard ALL non-Base64 characters
387            base64Data = discardNonBase64(base64Data);
388    
389            // handle the edge case, so we don't have to worry about it later
390            if (base64Data.length == 0) {
391                return new byte[0];
392            }
393    
394            int numberQuadruple = base64Data.length / FOURBYTE;
395            byte decodedData[];
396            byte b1, b2, b3, b4, marker0, marker1;
397    
398            // Throw away anything not in base64Data
399            int dataIndex;
400            int encodedIndex = 0;
401            {
402                // this sizes the output array properly - rlw
403                int lastData = base64Data.length;
404                // ignore the '=' padding
405                while (base64Data[lastData - 1] == PAD) {
406                    if (--lastData == 0) {
407                        return new byte[0];
408                    }
409                }
410                decodedData = new byte[lastData - numberQuadruple];
411            }
412    
413            for (int i = 0; i < numberQuadruple; i++) {
414                dataIndex = i * 4;
415                marker0 = base64Data[dataIndex + 2];
416                marker1 = base64Data[dataIndex + 3];
417    
418                b1 = base64Alphabet[base64Data[dataIndex]];
419                b2 = base64Alphabet[base64Data[dataIndex + 1]];
420    
421                if (marker0 != PAD && marker1 != PAD) {
422                    //No PAD e.g 3cQl
423                    b3 = base64Alphabet[marker0];
424                    b4 = base64Alphabet[marker1];
425    
426                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
427                    decodedData[encodedIndex + 1] =
428                        (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
429                    decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
430                } else if (marker0 == PAD) {
431                    //Two PAD e.g. 3c[Pad][Pad]
432                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
433                } else // if ( marker1 == PAD ) (always true at this point)
434                {
435                    //One PAD e.g. 3cQ[Pad]
436                    b3 = base64Alphabet[marker0];
437    
438                    decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
439                    decodedData[encodedIndex + 1] =
440                        (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
441                }
442                encodedIndex += 3;
443            }
444            return decodedData;
445        }
446    
447        /**
448         * Discards any whitespace from a base-64 encoded block.
449         *
450         * @param data The base-64 encoded data to discard the whitespace
451         *             from.
452         * @return The data, less whitespace (see RFC 2045).
453         */
454        static byte[] discardWhitespace(byte[] data) {
455            byte groomedData[] = new byte[data.length];
456            int bytesCopied = 0;
457    
458            for (int i = 0; i < data.length; i++) {
459                switch (data[i]) {
460                    case (byte) ' ':
461                    case (byte) '\n':
462                    case (byte) '\r':
463                    case (byte) '\t':
464                        break;
465                    default:
466                        groomedData[bytesCopied++] = data[i];
467                }
468            }
469    
470            byte packedData[] = new byte[bytesCopied];
471    
472            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
473    
474            return packedData;
475        }
476    
477        /**
478         * Discards any characters outside of the base64 alphabet, per
479         * the requirements on page 25 of RFC 2045 - "Any characters
480         * outside of the base64 alphabet are to be ignored in base64
481         * encoded data."
482         *
483         * @param data The base-64 encoded data to groom
484         * @return The data, less non-base64 characters (see RFC 2045).
485         */
486        static byte[] discardNonBase64(byte[] data) {
487            byte groomedData[] = new byte[data.length];
488            int bytesCopied = 0;
489    
490            for (int i = 0; i < data.length; i++) {
491                if (isBase64(data[i])) {
492                    groomedData[bytesCopied++] = data[i];
493                }
494            }
495    
496            byte packedData[] = new byte[bytesCopied];
497    
498            System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
499    
500            return packedData;
501        }
502    
503        // Implementation of the Encoder Interface
504    
505        /**
506         * Encodes an Object using the base64 algorithm.  This method
507         * is provided in order to satisfy the requirements of the
508         * Encoder interface, and will throw an EncoderException if the
509         * supplied object is not of type byte[].
510         *
511         * @param pObject Object to encode
512         * @return An object (of type byte[]) containing the
513         *         base64 encoded data which corresponds to the byte[] supplied.
514         * @throws IllegalArgumentException if the parameter supplied is not
515         *                                  of type byte[]
516         */
517        public Object encode(Object pObject) throws IllegalArgumentException {
518            if (!(pObject instanceof byte[])) {
519                throw new IllegalArgumentException("Parameter supplied to Base64 encode is not a byte[]");
520            }
521            return encode((byte[]) pObject);
522        }
523    
524        /**
525         * Encodes a byte[] containing binary data, into a byte[] containing
526         * characters in the Base64 alphabet.
527         *
528         * @param pArray a byte array containing binary data
529         * @return A byte array containing only Base64 character data
530         */
531        public byte[] encode(byte[] pArray) {
532            return encodeBase64(pArray, false);
533        }
534    
535    }