Code:
/ 4.0 / 4.0 / untmp / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / clr / src / BCL / System / Text / EUCJPEncoding.cs / 1305376 / EUCJPEncoding.cs
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
#if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
namespace System.Text
{
using System.Text;
using System.Globalization;
// EUCJPEncoding
//
// EUC-JP Encoding (51932)
//
// EUC-JP has the following code points:
// 00-7F - ASCII
// 80-8D & 90-9F - Control. (Like Unicode, except for 8e and 8f)
// A1-FE, A1-FE - 2 byte JIS X 0208 range.
// 8E, A1-DF - 2 byte half-width Katakana
// 8F, A1-FE, A1-FE - 3 byte JIX X 0212 range. WE DON'T USE JIS 0212!!!
//
// New thoughts:
// Fixing windows 20932 code page so that all characters can be looked up there.
//
// Old thoughts:
// Windows NLS uses a special CP20932 for EUC-JP, but it is not used by mlang. Windows
// Maps the 3 byte ranges to the 2 byte CP20932 by masking the 2nd byte with & 0x7F.
// MLang uses the native windows 932 code page, which is more reliable, however the code points
// don't line up as nicely as the 20932 code page, however it doesn't have JIS X 0212 support.
//
// So what we do is:
// 1. For ASCII, leave it alone
// 2. For half-width Katakana, use the leading byte and convert with 20936 code page.
// 3. For JIS X 0208, Use the leading & trailing bytes with 20936 code page
// 4. For JIS X 0212, Remove the lead byte, & 0xFF7F, and use the CP20936 table to convert.
//
// Regarding Normalization:
// Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
// Form D is precluded because of 0x00a8, which changes to space + dierises.
//
// I think that IsAlwaysNormalized should probably return true for form C (but not certain)
//
// NOTE: We don't use JIS 0212 so we are basically a DBCS code page, we just have to modify
// the 932 table we're basing this on.
//
using System;
[Serializable]
internal class EUCJPEncoding : DBCSCodePageEncoding
{
// This pretends to be CP 932 as far as memory tables are concerned.
[System.Security.SecurityCritical] // auto-generated
public EUCJPEncoding() : base(51932, 932)
{
this.m_bUseMlangTypeForSerialization = true;
}
[System.Security.SecurityCritical] // auto-generated
protected unsafe override String GetMemorySectionName()
{
int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
String strName = String.Format(CultureInfo.InvariantCulture, "CodePage_{0}_{1}_{2}_{3}_{4}_EUCJP",
iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
return strName;
}
// Clean up characters for EUC-JP code pages, etc.
protected override bool CleanUpBytes(ref int bytes)
{
if (bytes >= 0x100)
{
// map extended char (0xfa40-0xfc4b) to a special range
// (ported from mlang)
if (bytes >= 0xfa40 && bytes <= 0xfc4b)
{
if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
{
if ( bytes <= 0xfa49 )
bytes = bytes - 0x0b51 ;
else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
bytes = bytes - 0x072f6 ;
else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
bytes = bytes - 0x0b5b ;
else if ( bytes == 0xfa58 )
bytes = 0x878a ;
else if ( bytes == 0xfa59 )
bytes = 0x8782 ;
else if ( bytes == 0xfa5a )
bytes = 0x8784 ;
else if ( bytes == 0xfa5b )
bytes = 0x879a ;
}
else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
{
byte tc = unchecked((byte)bytes);
if ( tc < 0x5c )
bytes = bytes - 0x0d5f;
else if ( tc >= 0x80 && tc <= 0x9B )
bytes = bytes - 0x0d1d;
else
bytes = bytes - 0x0d1c;
}
}
// Convert 932 code page to 20932 like code page range
// (also ported from mlang)
byte bLead = unchecked((byte)(bytes >> 8));
byte bTrail = unchecked((byte)bytes);
bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
bLead = (byte)((bLead << 1) + 1);
if (bTrail > (byte)0x9e)
{
bTrail -= (byte)0x7e;
bLead++;
}
else
{
if (bTrail > (byte)0x7e)
bTrail--;
bTrail -= (byte)0x1f;
}
bytes = ((int)bLead) << 8 | (int)bTrail | 0x8080;
// // Don't step on our katakana special hack plane, if katakana space return false.
// if (bytes >= 0x8E00 && bytes <= 0x8EFF)
// return false;
// Don't step out of our allocated lead byte area.
// All DBCS lead and trail bytes should be >= 0xa1 and <= 0xfe
if ((bytes & 0xFF00) < 0xa100 || (bytes & 0xFF00) > 0xfe00 ||
(bytes & 0xFF) < 0xa1 || (bytes & 0xFF) > 0xfe)
return false;
// WARNING: Our funky mapping allows illegal values, which we continue to use
// so that we're compatible with Everett.
}
else
{
// For 51932 1/2 Katakana gets a 0x8E lead byte
// Adjust 1/2 Katakana
if (bytes >= 0xa1 && bytes <= 0xdf)
{
bytes |= 0x8E00;
return true;
}
// 0x81-0x9f and 0xe0-0xfc CP 932
// 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though)
// b0-df is 1/2 Katakana
// So 81-9f & e0-fc are 932 lead bytes, a1-fe are our lead bytes
// so ignore everything above 0x80 except 0xa0 and 0xff
if (bytes >= 0x81 && bytes != 0xa0 && bytes != 0xff)
{
// We set diffent lead bytes later, so just return false
return false;
}
}
return true;
}
[System.Security.SecurityCritical] // auto-generated
protected override unsafe void CleanUpEndBytes(char* chars)
{
// Need to special case CP 51932
// 0x81-0x9f and 0xe0-0xfc CP 932
// 0x8e and 0xa1-0xfe CP 20932
// 0x10 and 0x21-0x9? Us (remapping 932)
// b0-df is 1/2 Katakana (trail byte)
// A1-FE are DBCS code points
for (int i = 0xA1; i <= 0xFE; i++)
chars[i] = LEAD_BYTE_CHAR;
// And 8E is lead byte for Katakana (already set)
chars[0x8e] = LEAD_BYTE_CHAR;
}
}
}
#endif // FEATURE_CODEPAGES_FILE
// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- RouteItem.cs
- NameValuePair.cs
- NameTable.cs
- CheckBoxPopupAdapter.cs
- ListBox.cs
- ThrowHelper.cs
- Part.cs
- SHA1Managed.cs
- CharacterBuffer.cs
- VideoDrawing.cs
- PreservationFileReader.cs
- SeekableReadStream.cs
- DataServiceBuildProvider.cs
- SharedPersonalizationStateInfo.cs
- VScrollBar.cs
- PropertyDescriptor.cs
- SQLInt16.cs
- TextServicesContext.cs
- ClientConfigurationSystem.cs
- UpDownBase.cs
- MemberHolder.cs
- _SafeNetHandles.cs
- SqlErrorCollection.cs
- AutomationElementIdentifiers.cs
- ExpandableObjectConverter.cs
- XXXInfos.cs
- XamlTypeWithExplicitNamespace.cs
- InvalidProgramException.cs
- RuntimeArgumentHandle.cs
- ProxyGenerationError.cs
- ToolStripDropDownClosingEventArgs.cs
- RightsManagementPermission.cs
- ProfileEventArgs.cs
- CodePageEncoding.cs
- StrokeNodeEnumerator.cs
- CommandID.cs
- QueueAccessMode.cs
- MetaDataInfo.cs
- Thumb.cs
- BitmapFrameDecode.cs
- WindowsComboBox.cs
- ProfileGroupSettings.cs
- Triangle.cs
- C14NUtil.cs
- WindowPattern.cs
- Switch.cs
- SqlEnums.cs
- ToolStripGrip.cs
- ModulesEntry.cs
- SQLInt64.cs
- MarkupWriter.cs
- ToolStripDesignerAvailabilityAttribute.cs
- AdRotator.cs
- Utils.cs
- FileClassifier.cs
- DebugView.cs
- GridViewColumnHeader.cs
- ScriptingScriptResourceHandlerSection.cs
- Int64AnimationBase.cs
- ToolStripSplitButton.cs
- TrackingQuery.cs
- PenThread.cs
- TTSEngineTypes.cs
- MenuItemStyleCollection.cs
- GradientBrush.cs
- ViewValidator.cs
- PublisherIdentityPermission.cs
- MiniAssembly.cs
- SchemaImporter.cs
- TableRowGroup.cs
- QueryableFilterUserControl.cs
- MarkupExtensionReturnTypeAttribute.cs
- WebPartCatalogAddVerb.cs
- UshortList2.cs
- ClassValidator.cs
- ConfigurationSection.cs
- CompareValidator.cs
- SqlLiftIndependentRowExpressions.cs
- XmlName.cs
- WebPartHelpVerb.cs
- Size3D.cs
- SecurityPermission.cs
- SEHException.cs
- StagingAreaInputItem.cs
- AutomationElement.cs
- ObjectStateManager.cs
- PropertyValueChangedEvent.cs
- BaseCollection.cs
- WebScriptMetadataMessage.cs
- PasswordPropertyTextAttribute.cs
- TaskResultSetter.cs
- SoapReflectionImporter.cs
- SqlFunctionAttribute.cs
- FileNotFoundException.cs
- ProcessHost.cs
- FileDialog.cs
- DefaultWorkflowTransactionService.cs
- SoapElementAttribute.cs
- RoutedCommand.cs
- ExtensionSimplifierMarkupObject.cs