UTF32Encoding.cs source code in C# .NET

                        Code:
                         / 4.0 / 4.0 / untmp / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / clr / src / BCL / System / Text / UTF32Encoding.cs / 1305376 / UTF32Encoding.cs
                        
                        
                            // ==++== 
//
//   Copyright (c) Microsoft Corporation.  All rights reserved.
//
// ==--== 
//
// Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused. 
// 

#if FEATURE_UTF32 

namespace System.Text
{
 
    using System;
    using System.Diagnostics.Contracts; 
    using System.Globalization; 
    // Encodes text into and out of UTF-32.  UTF-32 is a way of writing
    // Unicode characters with a single storage unit (32 bits) per character, 
    //
    // The UTF-32 byte order mark is simply the Unicode byte order mark
    // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000).  The byte order
    // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't 
    // switch the byte orderings.
 
    [Serializable] 
    public sealed class UTF32Encoding : Encoding
    { 
        /*
            words   bits    UTF-32 representation
            -----   ----    -----------------------------------
            1       16      00000000 00000000 xxxxxxxx xxxxxxxx 
            2       21      00000000 000xxxxx hhhhhhll llllllll
            -----   ----    ----------------------------------- 
 
            Surrogate:
            Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000 
         */

        //
        private bool emitUTF32ByteOrderMark = false; 
        private bool isThrowException = false;
        private bool bigEndian = false; 
 

        public UTF32Encoding(): this(false, true, false) 
        {
        }

 
        public UTF32Encoding(bool bigEndian, bool byteOrderMark):
            this(bigEndian, byteOrderMark, false) 
        { 
        }
 

        public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters):
            base(bigEndian ? 12001 : 12000)
        { 
            this.bigEndian = bigEndian;
            this.emitUTF32ByteOrderMark = byteOrderMark; 
            this.isThrowException = throwOnInvalidCharacters; 

            // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions 
            if (this.isThrowException)
                SetDefaultFallbacks();
        }
 
        internal override void SetDefaultFallbacks()
        { 
            // For UTF-X encodings, we use a replacement fallback with an empty string 
            if (this.isThrowException)
            { 
                this.encoderFallback = EncoderFallback.ExceptionFallback;
                this.decoderFallback = DecoderFallback.ExceptionFallback;
            }
            else 
            {
                this.encoderFallback = new EncoderReplacementFallback("\xFFFD"); 
                this.decoderFallback = new DecoderReplacementFallback("\xFFFD"); 
            }
        } 


        //
        // The following methods are copied from EncodingNLS.cs. 
        // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
        // These should be kept in [....] for the following classes: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 
        //
 
        // Returns the number of bytes required to encode a range of characters in
        // a character array.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 
        // So if you fix this, fix the others.  Currently those include:
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 
        // parent method is safe 

        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetByteCount(char[] chars, int index, int count)
        {
            // Validate input parameters
            if (chars == null) 
                throw new ArgumentNullException("chars",
                      Environment.GetResourceString("ArgumentNull_Array")); 
 
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"), 
                      Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));

            if (chars.Length - index < count)
                throw new ArgumentOutOfRangeException("chars", 
                      Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
            Contract.EndContractBlock(); 
 
            // If no input, return 0, avoid fixed empty array problem
            if (chars.Length == 0) 
                return 0;

            // Just call the pointer version
            fixed (char* pChars = chars) 
                return GetByteCount(pChars + index, count, null);
        } 
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
        // parent method is safe

        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetByteCount(String s)
        { 
            // Validate input 
            if (s==null)
                throw new ArgumentNullException("s"); 
            Contract.EndContractBlock();

            fixed (char* pChars = s)
                return GetByteCount(pChars, s.Length, null); 
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 
        // So if you fix this, fix the others.  Currently those include:
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 

        [System.Security.SecurityCritical]  // auto-generated
        [CLSCompliant(false)]
        public override unsafe int GetByteCount(char* chars, int count) 
        {
            // Validate Parameters 
            if (chars == null) 
                throw new ArgumentNullException("chars",
                    Environment.GetResourceString("ArgumentNull_Array")); 

            if (count < 0)
                throw new ArgumentOutOfRangeException("count",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 
            Contract.EndContractBlock();
 
            // Call it with empty encoder 
            return GetByteCount(chars, count, null);
        } 

        // Parent method is safe.
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 
        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetBytes(String s, int charIndex, int charCount,
                                              byte[] bytes, int byteIndex) 
        {
            if (s == null || bytes == null)
                throw new ArgumentNullException((s == null ? "s" : "bytes"),
                      Environment.GetResourceString("ArgumentNull_Array")); 

            if (charIndex < 0 || charCount < 0) 
                throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"), 
                      Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
 
            if (s.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException("s",
                      Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
 
            if (byteIndex < 0 || byteIndex > bytes.Length)
                throw new ArgumentOutOfRangeException("byteIndex", 
                    Environment.GetResourceString("ArgumentOutOfRange_Index")); 
            Contract.EndContractBlock();
 
            int byteCount = bytes.Length - byteIndex;

            // Fix our input array if 0 length because fixed doesn't like 0 length arrays
            if (bytes.Length == 0) 
                bytes = new byte[1];
 
            fixed (char* pChars = s) 
                fixed ( byte* pBytes = bytes)
                    return GetBytes(pChars + charIndex, charCount, 
                                    pBytes + byteIndex, byteCount, null);
        }

        // Encodes a range of characters in a character array into a range of bytes 
        // in a byte array. An exception occurs if the byte array is not large
        // enough to hold the complete encoding of the characters. The 
        // GetByteCount method can be used to determine the exact number of 
        // bytes that will be produced for a given range of characters.
        // Alternatively, the GetMaxByteCount method can be used to 
        // determine the maximum number of bytes that will be produced for a given
        // number of characters, regardless of the actual character values.
        //
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 
        // So if you fix this, fix the others.  Currently those include:
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 
        // parent method is safe 

        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
                                               byte[] bytes, int byteIndex)
        {
            // Validate parameters 
            if (chars == null || bytes == null)
                throw new ArgumentNullException((chars == null ? "chars" : "bytes"), 
                      Environment.GetResourceString("ArgumentNull_Array")); 

            if (charIndex < 0 || charCount < 0) 
                throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
                      Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));

            if (chars.Length - charIndex < charCount) 
                throw new ArgumentOutOfRangeException("chars",
                      Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); 
 
            if (byteIndex < 0 || byteIndex > bytes.Length)
                throw new ArgumentOutOfRangeException("byteIndex", 
                     Environment.GetResourceString("ArgumentOutOfRange_Index"));
            Contract.EndContractBlock();

            // If nothing to encode return 0, avoid fixed problem 
            if (chars.Length == 0)
                return 0; 
 
            // Just call pointer version
            int byteCount = bytes.Length - byteIndex; 

            // Fix our input array if 0 length because fixed doesn't like 0 length arrays
            if (bytes.Length == 0)
                bytes = new byte[1]; 

            fixed (char* pChars = chars) 
                fixed (byte* pBytes = bytes) 
                    // Remember that byteCount is # to decode, not size of array.
                    return GetBytes(pChars + charIndex, charCount, 
                                    pBytes + byteIndex, byteCount, null);
        }

        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 
        // So if you fix this, fix the others.  Currently those include:
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 
 
        [System.Security.SecurityCritical]  // auto-generated
        [CLSCompliant(false)] 
        public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
        {
            // Validate Parameters
            if (bytes == null || chars == null) 
                throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
                    Environment.GetResourceString("ArgumentNull_Array")); 
 
            if (charCount < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"), 
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock();

            return GetBytes(chars, charCount, bytes, byteCount, null); 
        }
 
        // Returns the number of characters produced by decoding a range of bytes 
        // in a byte array.
        // 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include:
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
        // parent method is safe 

        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetCharCount(byte[] bytes, int index, int count) 
        {
            // Validate Parameters 
            if (bytes == null)
                throw new ArgumentNullException("bytes",
                    Environment.GetResourceString("ArgumentNull_Array"));
 
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"), 
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 

            if (bytes.Length - index < count) 
                throw new ArgumentOutOfRangeException("bytes",
                    Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
            Contract.EndContractBlock();
 
            // If no input just return 0, fixed doesn't like 0 length arrays.
            if (bytes.Length == 0) 
                return 0; 

            // Just call pointer version 
            fixed (byte* pBytes = bytes)
                return GetCharCount(pBytes + index, count, null);
        }
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 

        [System.Security.SecurityCritical]  // auto-generated 
        [CLSCompliant(false)]
        public override unsafe int GetCharCount(byte* bytes, int count)
        {
            // Validate Parameters 
            if (bytes == null)
                throw new ArgumentNullException("bytes", 
                    Environment.GetResourceString("ArgumentNull_Array")); 

            if (count < 0) 
                throw new ArgumentOutOfRangeException("count",
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock();
 
            return GetCharCount(bytes, count, null);
        } 
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
        // parent method is safe

        [System.Security.SecuritySafeCritical]  // auto-generated 
        public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
                                              char[] chars, int charIndex) 
        { 
            // Validate Parameters
            if (bytes == null || chars == null) 
                throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
                    Environment.GetResourceString("ArgumentNull_Array"));

            if (byteIndex < 0 || byteCount < 0) 
                throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum")); 
 
            if ( bytes.Length - byteIndex < byteCount)
                throw new ArgumentOutOfRangeException("bytes", 
                    Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));

            if (charIndex < 0 || charIndex > chars.Length)
                throw new ArgumentOutOfRangeException("charIndex", 
                    Environment.GetResourceString("ArgumentOutOfRange_Index"));
            Contract.EndContractBlock(); 
 
            // If no input, return 0 & avoid fixed problem
            if (bytes.Length == 0) 
                return 0;

            // Just call pointer version
            int charCount = chars.Length - charIndex; 

            // Fix our input array if 0 length because fixed doesn't like 0 length arrays 
            if (chars.Length == 0) 
                chars = new char[1];
 
            fixed (byte* pBytes = bytes)
                fixed (char* pChars = chars)
                    // Remember that charCount is # to decode, not size of array
                    return GetChars(pBytes + byteIndex, byteCount, 
                                    pChars + charIndex, charCount, null);
        } 
 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding

        [System.Security.SecurityCritical]  // auto-generated
        [CLSCompliant(false)] 
        public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
        { 
            // Validate Parameters 
            if (bytes == null || chars == null)
                throw new ArgumentNullException(bytes == null ? "bytes" : "chars", 
                    Environment.GetResourceString("ArgumentNull_Array"));

            if (charCount < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"), 
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock(); 
 
            return GetChars(bytes, byteCount, chars, charCount, null);
        } 

        // Returns a string containing the decoded representation of a range of
        // bytes in a byte array.
        // 
        // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
        // So if you fix this, fix the others.  Currently those include: 
        // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 
        // parent method is safe
 
        [System.Security.SecuritySafeCritical]  // auto-generated
        public override unsafe String GetString(byte[] bytes, int index, int count)
        {
            // Validate Parameters 
            if (bytes == null)
                throw new ArgumentNullException("bytes", 
                    Environment.GetResourceString("ArgumentNull_Array")); 

            if (index < 0 || count < 0) 
                throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"),
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));

            if (bytes.Length - index < count) 
                throw new ArgumentOutOfRangeException("bytes",
                    Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer")); 
            Contract.EndContractBlock(); 

            // Avoid problems with empty input buffer 
            if (bytes.Length == 0) return String.Empty;

            fixed (byte* pBytes = bytes)
                return String.CreateStringFromEncoding( 
                    pBytes + index, count, this);
        } 
 
        //
        // End of standard methods copied from EncodingNLS.cs 
        //

        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder) 
        {
            Contract.Assert(chars!=null, "[UTF32Encoding.GetByteCount]chars!=null"); 
            Contract.Assert(count >=0, "[UTF32Encoding.GetByteCount]count >=0"); 

            char* end = chars + count; 
            char* charStart = chars;
            int byteCount = 0;

            char highSurrogate = '\0'; 

            // For fallback we may need a fallback buffer 
            EncoderFallbackBuffer fallbackBuffer = null; 
            if (encoder != null)
            { 
                highSurrogate = encoder.charLeftOver;
                fallbackBuffer = encoder.FallbackBuffer;

                // We mustn't have left over fallback data when counting 
                if (fallbackBuffer.Remaining > 0)
                    throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty", 
                    this.EncodingName, encoder.Fallback.GetType())); 
            }
            else 
            {
                fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
            }
 
            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(charStart, end, encoder, false); 
 
            char ch;
            TryAgain: 

            while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
            {
                // First unwind any fallback 
                if (ch == 0)
                { 
                    // No fallback, just get next char 
                    ch = *chars;
                    chars++; 
                }

                // Do we need a low surrogate?
                if (highSurrogate != '\0') 
                {
                    // 
                    // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here. 
                    //
                    if (Char.IsLowSurrogate(ch)) 
                    {
                        // They're all legal
                        highSurrogate = '\0';
 
                        //
                        // One surrogate pair will be translated into 4 bytes UTF32. 
                        // 

                        byteCount += 4; 
                        continue;
                    }

                    // We are missing our low surrogate, decrement chars and fallback the high surrogate 
                    // The high surrogate may have come from the encoder, but nothing else did.
                    Contract.Assert(chars > charStart, 
                        "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate"); 
                    chars--;
 
                    // Do the fallback
                    fallbackBuffer.InternalFallback(highSurrogate, ref chars);

                    // We're going to fallback the old high surrogate. 
                    highSurrogate = '\0';
                    continue; 
 
                }
 
                // Do we have another high surrogate?
                if (Char.IsHighSurrogate(ch))
                {
                    // 
                    // We'll have a high surrogate to check next time.
                    // 
                    highSurrogate = ch; 
                    continue;
                } 

                // Check for illegal characters
                if (Char.IsLowSurrogate(ch))
                { 
                    // We have a leading low surrogate, do the fallback
                    fallbackBuffer.InternalFallback(ch, ref chars); 
 
                    // Try again with fallback buffer
                    continue; 
                }

                // We get to add the character (4 bytes UTF32)
                byteCount += 4; 
            }
 
            // May have to do our last surrogate 
            if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
            { 
                // We have to do the fallback for the lonely high surrogate
                fallbackBuffer.InternalFallback(highSurrogate, ref chars);
                highSurrogate = (char)0;
                goto TryAgain; 
            }
 
            // Check for overflows. 
            if (byteCount < 0)
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString( 
                    "ArgumentOutOfRange_GetByteCountOverflow"));

            // Shouldn't have anything in fallback buffer for GetByteCount
            // (don't have to check m_throwOnOverflow for count) 
            Contract.Assert(fallbackBuffer.Remaining == 0,
                "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end"); 
 
            // Return our count
            return byteCount; 
        }

        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetBytes(char *chars, int charCount, 
                                                 byte* bytes, int byteCount, EncoderNLS encoder)
        { 
            Contract.Assert(chars!=null, "[UTF32Encoding.GetBytes]chars!=null"); 
            Contract.Assert(bytes!=null, "[UTF32Encoding.GetBytes]bytes!=null");
            Contract.Assert(byteCount >=0, "[UTF32Encoding.GetBytes]byteCount >=0"); 
            Contract.Assert(charCount >=0, "[UTF32Encoding.GetBytes]charCount >=0");

            char* charStart = chars;
            char* charEnd = chars + charCount; 
            byte* byteStart = bytes;
            byte* byteEnd = bytes + byteCount; 
 
            char highSurrogate = '\0';
 
            // For fallback we may need a fallback buffer
            EncoderFallbackBuffer fallbackBuffer = null;
            if (encoder != null)
            { 
                highSurrogate = encoder.charLeftOver;
                fallbackBuffer = encoder.FallbackBuffer; 
 
                // We mustn't have left over fallback data when not converting
                if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0) 
                    throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
                    this.EncodingName, encoder.Fallback.GetType()));
            }
            else 
            {
                fallbackBuffer = this.encoderFallback.CreateFallbackBuffer(); 
            } 

            // Set our internal fallback interesting things. 
            fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);

            char ch;
            TryAgain: 

            while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd) 
            { 
                // First unwind any fallback
                if (ch == 0) 
                {
                    // No fallback, just get next char
                    ch = *chars;
                    chars++; 
                }
 
                // Do we need a low surrogate? 
                if (highSurrogate != '\0')
                { 
                    //
                    // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
                    //
                    if (Char.IsLowSurrogate(ch)) 
                    {
                        // Is it a legal one? 
                        uint iTemp = GetSurrogate(highSurrogate, ch); 
                        highSurrogate = '\0';
 
                        //
                        // One surrogate pair will be translated into 4 bytes UTF32.
                        //
                        if (bytes+3 >= byteEnd) 
                        {
                            // Don't have 4 bytes 
                            if (fallbackBuffer.bFallingBack) 
                            {
                                fallbackBuffer.MovePrevious();                  // Aren't using these 2 fallback chars 
                                fallbackBuffer.MovePrevious();
                            }
                            else
                            { 
                                // If we don't have enough room, then either we should've advanced a while
                                // or we should have bytes==byteStart and throw below 
                                Contract.Assert(chars > charStart + 1 || bytes == byteStart, 
                                    "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
                                chars-=2;                                       // Aren't using those 2 chars 
                            }
                            ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
                            highSurrogate = (char)0;                            // Nothing left over (we backed up to start of pair if supplimentary)
                            break; 
                        }
 
                        if (bigEndian) 
                        {
                            *(bytes++) = (byte)(0x00); 
                            *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0
                            *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
                            *(bytes++) = (byte)(iTemp);             // Implies & 0xFF
                        } 
                        else
                        { 
                            *(bytes++) = (byte)(iTemp);             // Implies & 0xFF 
                            *(bytes++) = (byte)(iTemp >> 8);        // Implies & 0xFF
                            *(bytes++) = (byte)(iTemp >> 16);       // Implies & 0xFF, which isn't needed cause high are all 0 
                            *(bytes++) = (byte)(0x00);
                        }
                        continue;
                    } 

                    // We are missing our low surrogate, decrement chars and fallback the high surrogate 
                    // The high surrogate may have come from the encoder, but nothing else did. 
                    Contract.Assert(chars > charStart,
                        "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate"); 
                    chars--;

                    // Do the fallback
                    fallbackBuffer.InternalFallback(highSurrogate, ref chars); 

                    // We're going to fallback the old high surrogate. 
                    highSurrogate = '\0'; 
                    continue;
                } 

                // Do we have another high surrogate?, if so remember it
                if (Char.IsHighSurrogate(ch))
                { 
                    //
                    // We'll have a high surrogate to check next time. 
                    // 
                    highSurrogate = ch;
                    continue; 
                }

                // Check for illegal characters (low surrogate)
                if (Char.IsLowSurrogate(ch)) 
                {
                    // We have a leading low surrogate, do the fallback 
                    fallbackBuffer.InternalFallback(ch, ref chars); 

                    // Try again with fallback buffer 
                    continue;
                }

                // We get to add the character, yippee. 
                if (bytes+3 >= byteEnd)
                { 
                    // Don't have 4 bytes 
                    if (fallbackBuffer.bFallingBack)
                        fallbackBuffer.MovePrevious();                  // Aren't using this fallback char 
                    else
                    {
                        // Must've advanced already
                        Contract.Assert(chars > charStart, 
                            "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
                        chars--;                                        // Aren't using this char 
                    } 
                    ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
                    break;                                              // Didn't throw, stop 
                }

                if (bigEndian)
                { 
                    *(bytes++) = (byte)(0x00);
                    *(bytes++) = (byte)(0x00); 
                    *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF 
                    *(bytes++) = (byte)(ch);            // Implies & 0xFF
                } 
                else
                {
                    *(bytes++) = (byte)(ch);            // Implies & 0xFF
                    *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF 
                    *(bytes++) = (byte)(0x00);
                    *(bytes++) = (byte)(0x00); 
                } 
            }
 
            // May have to do our last surrogate
            if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
            {
                // We have to do the fallback for the lonely high surrogate 
                fallbackBuffer.InternalFallback(highSurrogate, ref chars);
                highSurrogate = (char)0; 
                goto TryAgain; 
            }
 
            // Fix our encoder if we have one
            Contract.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
                "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
 
            if (encoder != null)
            { 
                // Remember our left over surrogate (or 0 if flushing) 
                encoder.charLeftOver = highSurrogate;
 
                // Need # chars used
                encoder.m_charsUsed = (int)(chars-charStart);
            }
 
            // return the new length
            return (int)(bytes - byteStart); 
        } 

        [System.Security.SecurityCritical]  // auto-generated 
        internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
        {
            Contract.Assert(bytes!=null, "[UTF32Encoding.GetCharCount]bytes!=null");
            Contract.Assert(count >=0, "[UTF32Encoding.GetCharCount]count >=0"); 

            UTF32Decoder decoder = (UTF32Decoder)baseDecoder; 
 
            // None so far!
            int charCount = 0; 
            byte* end = bytes + count;
            byte* byteStart = bytes;

            // Set up decoder 
            int readCount = 0;
            uint iChar = 0; 
 
            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null; 

            // See if there's anything in our decoder
            if (decoder != null)
            { 
                readCount = decoder.readByteCount;
                iChar = (uint)decoder.iChar; 
                fallbackBuffer = decoder.FallbackBuffer; 

                // Shouldn't have anything in fallback buffer for GetCharCount 
                // (don't have to check m_throwOnOverflow for chars or count)
                Contract.Assert(fallbackBuffer.Remaining == 0,
                    "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
            } 
            else
            { 
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer(); 
            }
 
            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(byteStart, null);

            // Loop through our input, 4 characters at a time! 
            while (bytes < end && charCount >= 0)
            { 
                // Get our next character 
                if(bigEndian)
                { 
                    // Scoot left and add it to the bottom
                    iChar <<= 8;
                    iChar += *(bytes++);
                } 
                else
                { 
                    // Scoot right and add it to the top 
                    iChar >>= 8;
                    iChar += (uint)(*(bytes++)) << 24; 
                }

                readCount++;
 
                // See if we have all the bytes yet
                if (readCount < 4) 
                    continue; 

                // Have the bytes 
                readCount = 0;

                // See if its valid to encode
                if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF)) 
                {
                    // Need to fall back these 4 bytes 
                    byte[] fallbackBytes; 
                    if (this.bigEndian)
                    { 
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
                            unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
                    } 
                    else
                    { 
                        fallbackBytes = new byte[] { 
                            unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
                            unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) }; 
                    }

                    charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
 
                    // Ignore the illegal character
                    iChar = 0; 
                    continue; 
                }
 
                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2 
                    charCount++;
                } 
 
                // Add the rest of the surrogate or our normal character
                charCount++; 

                // iChar is back to 0
                iChar = 0;
            } 

            // See if we have something left over that has to be decoded 
            if (readCount > 0 && (decoder == null || decoder.MustFlush)) 
            {
                // Oops, there's something left over with no place to go. 
                byte[] fallbackBytes = new byte[readCount];
                if (this.bigEndian)
                {
                    while(readCount > 0) 
                    {
                        fallbackBytes[--readCount] = unchecked((byte)iChar); 
                        iChar >>= 8; 
                    }
                } 
                else
                {
                    while (readCount > 0)
                    { 
                        fallbackBytes[--readCount] = unchecked((byte)(iChar>>24));
                        iChar <<= 8; 
                    } 
                }
 
                charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
            }

            // Check for overflows. 
            if (charCount < 0)
                throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); 
 
            // Shouldn't have anything in fallback buffer for GetCharCount
            // (don't have to check m_throwOnOverflow for chars or count) 
            Contract.Assert(fallbackBuffer.Remaining == 0,
                "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");

            // Return our count 
            return charCount;
        } 
 
        [System.Security.SecurityCritical]  // auto-generated
        internal override unsafe int GetChars(byte* bytes, int byteCount, 
                                                char* chars, int charCount, DecoderNLS baseDecoder)
        {
            Contract.Assert(chars!=null, "[UTF32Encoding.GetChars]chars!=null");
            Contract.Assert(bytes!=null, "[UTF32Encoding.GetChars]bytes!=null"); 
            Contract.Assert(byteCount >=0, "[UTF32Encoding.GetChars]byteCount >=0");
            Contract.Assert(charCount >=0, "[UTF32Encoding.GetChars]charCount >=0"); 
 
            UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
 
            // None so far!
            char* charStart = chars;
            char* charEnd = chars + charCount;
 
            byte* byteStart = bytes;
            byte* byteEnd = bytes + byteCount; 
 
            // See if there's anything in our decoder (but don't clear it yet)
            int readCount = 0; 
            uint iChar = 0;

            // For fallback we may need a fallback buffer
            DecoderFallbackBuffer fallbackBuffer = null; 

            // See if there's anything in our decoder 
            if (decoder != null) 
            {
                readCount = decoder.readByteCount; 
                iChar = (uint)decoder.iChar;
                fallbackBuffer = baseDecoder.FallbackBuffer;

                // Shouldn't have anything in fallback buffer for GetChars 
                // (don't have to check m_throwOnOverflow for chars)
                Contract.Assert(fallbackBuffer.Remaining == 0, 
                    "[UTF32Encoding.GetChars]Expected empty fallback buffer at start"); 
            }
            else 
            {
                fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
            }
 
            // Set our internal fallback interesting things.
            fallbackBuffer.InternalInitialize(bytes, chars + charCount); 
 
            // Loop through our input, 4 characters at a time!
            while (bytes < byteEnd) 
            {
                // Get our next character
                if(bigEndian)
                { 
                    // Scoot left and add it to the bottom
                    iChar <<= 8; 
                    iChar += *(bytes++); 
                }
                else 
                {
                    // Scoot right and add it to the top
                    iChar >>= 8;
                    iChar += (uint)(*(bytes++)) << 24; 
                }
 
                readCount++; 

                // See if we have all the bytes yet 
                if (readCount < 4)
                    continue;

                // Have the bytes 
                readCount = 0;
 
                // See if its valid to encode 
                if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
                { 
                    // Need to fall back these 4 bytes
                    byte[] fallbackBytes;
                    if (this.bigEndian)
                    { 
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)), 
                            unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) }; 
                    }
                    else 
                    {
                        fallbackBytes = new byte[] {
                            unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
                            unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) }; 
                    }
 
                    // Chars won't be updated unless this works. 
                    if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
                    { 
                        // Couldn't fallback, throw or wait til next time
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart
                        Contract.Assert(bytes >= byteStart + 4 || chars == charStart, 
                            "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
                        bytes-=4;                                       // get back to where we were 
                        iChar=0;                                        // Remembering nothing 
                        fallbackBuffer.InternalReset();
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output 
                        break;                                          // Stop here, didn't throw
                    }

                    // Ignore the illegal character 
                    iChar = 0;
                    continue; 
                } 

 
                // Ok, we have something we can add to our output
                if (iChar >= 0x10000)
                {
                    // Surrogates take 2 
                    if (chars >= charEnd - 1)
                    { 
                        // Throwing or stopping 
                        // We either read enough bytes for bytes-=4 to work, or we're
                        // going to throw in ThrowCharsOverflow because chars == charStart 
                        Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
                            "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
                        bytes-=4;                                       // get back to where we were
                        iChar=0;                                        // Remembering nothing 
                        ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
                        break;                                          // Stop here, didn't throw 
                    } 

                    *(chars++) = GetHighSurrogate(iChar); 
                    iChar = GetLowSurrogate(iChar);
                }
                // Bounds check for normal character
                else if (chars >= charEnd) 
                {
                    // Throwing or stopping 
                    // We either read enough bytes for bytes-=4 to work, or we're 
                    // going to throw in ThrowCharsOverflow because chars == charStart
                    Contract.Assert(bytes >= byteStart + 4 || chars == charStart, 
                        "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
                    bytes-=4;                                       // get back to where we were
                    iChar=0;                                        // Remembering nothing
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output 
                    break;                                          // Stop here, didn't throw
                } 
 
                // Add the rest of the surrogate or our normal character
                *(chars++) = (char)iChar; 

                // iChar is back to 0
                iChar = 0;
            } 

            // See if we have something left over that has to be decoded 
            if (readCount > 0 && (decoder == null || decoder.MustFlush)) 
            {
                // Oops, there's something left over with no place to go. 
                byte[] fallbackBytes = new byte[readCount];
                int tempCount = readCount;
                if (this.bigEndian)
                { 
                    while(tempCount > 0)
                    { 
                        fallbackBytes[--tempCount] = unchecked((byte)iChar); 
                        iChar >>= 8;
                    } 
                }
                else
                {
                    while (tempCount > 0) 
                    {
                        fallbackBytes[--tempCount] = unchecked((byte)(iChar>>24)); 
                        iChar <<= 8; 
                    }
                } 

                if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
                {
                    // Couldn't fallback. 
                    fallbackBuffer.InternalReset();
                    ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output 
                    // Stop here, didn't throw, backed up, so still nothing in buffer 
                }
                else 
                {
                    // Don't clear our decoder unless we could fall it back.
                    // If we caught the if above, then we're a convert() and will catch this next time.
                    readCount = 0; 
                    iChar = 0;
                } 
            } 

            // Remember any left over stuff, clearing buffer as well for MustFlush 
            if (decoder != null)
            {
                decoder.iChar = (int)iChar;
                decoder.readByteCount = readCount; 
                decoder.m_bytesUsed = (int)(bytes - byteStart);
            } 
 
            // Shouldn't have anything in fallback buffer for GetChars
            // (don't have to check m_throwOnOverflow for chars) 
            Contract.Assert(fallbackBuffer.Remaining == 0,
                "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");

            // Return our count 
            return (int)(chars - charStart);
        } 
 

        private uint GetSurrogate(char cHigh, char cLow) 
        {
            return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
        }
 
        private char GetHighSurrogate(uint iChar)
        { 
            return (char)((iChar - 0x10000) / 0x400 + 0xD800); 
        }
 
        private char GetLowSurrogate(uint iChar)
        {
            return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
        } 

 
        public override Decoder GetDecoder() 
        {
            return new UTF32Decoder(this); 
        }


        public override Encoder GetEncoder() 
        {
            return new EncoderNLS(this); 
        } 

 
        public override int GetMaxByteCount(int charCount)
        {
            if (charCount < 0)
               throw new ArgumentOutOfRangeException("charCount", 
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock(); 
 
            // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
            long byteCount = (long)charCount + 1; 

            if (EncoderFallback.MaxCharCount > 1)
                byteCount *= EncoderFallback.MaxCharCount;
 
            // 4 bytes per char
            byteCount *= 4; 
 
            if (byteCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow")); 

            return (int)byteCount;
        }
 

        public override int GetMaxCharCount(int byteCount) 
        { 
            if (byteCount < 0)
               throw new ArgumentOutOfRangeException("byteCount", 
                    Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            Contract.EndContractBlock();

            // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars, 
            // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
            // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair 
            int charCount = (byteCount / 2) + 2; 

            // Also consider fallback because our input bytes could be out of range of unicode. 
            // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
            if (DecoderFallback.MaxCharCount > 2)
            {
                // Multiply time fallback size 
                charCount *= DecoderFallback.MaxCharCount;
 
                // We were already figuring 2 chars per 4 bytes, but fallback will be different # 
                charCount /= 2;
            } 

            if (charCount > 0x7fffffff)
                throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
 
            return (int)charCount;
        } 
 

        public override byte[] GetPreamble() 
        {
            if (emitUTF32ByteOrderMark)
            {
                // Allocate new array to prevent users from modifying it. 
                if (bigEndian)
                { 
                    return new byte[4] { 0x00, 0x00, 0xFE, 0xFF }; 
                }
                else 
                {
                    return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
                }
            } 
            else
                return Encoding.emptyByteArray; 
        } 

 
        public override bool Equals(Object value)
        {
            UTF32Encoding that = value as UTF32Encoding;
            if (that != null) 
            {
                return (emitUTF32ByteOrderMark == that.emitUTF32ByteOrderMark) && 
                       (bigEndian == that.bigEndian) && 
//                       (isThrowException == that.isThrowException) && // same as encoder/decoderfallback being exceptions
                       (EncoderFallback.Equals(that.EncoderFallback)) && 
                       (DecoderFallback.Equals(that.DecoderFallback));
            }
            return (false);
        } 

 
        public override int GetHashCode() 
        {
            //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable. 
            return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
                   CodePage + (emitUTF32ByteOrderMark?4:0) + (bigEndian?8:0);
        }
 
        [Serializable]
        internal class UTF32Decoder : DecoderNLS 
        { 
            // Need a place to store any extra bytes we may have picked up
            internal int iChar = 0; 
            internal int readByteCount = 0;

            public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
            { 
                // base calls reset
            } 
 
            public override void Reset()
            { 
                this.iChar = 0;
                this.readByteCount = 0;
                if (m_fallbackBuffer != null)
                    m_fallbackBuffer.Reset(); 
            }
 
            // Anything left in our decoder? 
            internal override bool HasState
            { 
                get
                {
                    // ReadByteCount is our flag.  (iChar==0 doesn't mean much).
                    return (this.readByteCount != 0); 
                }
            } 
        } 
    }
} 

#endif // FEATURE_UTF32

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK
UTF32Encoding.cs source code in C# .NET

Source code for the .NET framework in C#

Code:

Link Menu