Code:
/ 4.0 / 4.0 / DEVDIV_TFS / Dev10 / Releases / RTMRel / ndp / clr / src / BCL / System / Text / EUCJPEncoding.cs / 1305376 / EUCJPEncoding.cs
// ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== #if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding namespace System.Text { using System.Text; using System.Globalization; // EUCJPEncoding // // EUC-JP Encoding (51932) // // EUC-JP has the following code points: // 00-7F - ASCII // 80-8D & 90-9F - Control. (Like Unicode, except for 8e and 8f) // A1-FE, A1-FE - 2 byte JIS X 0208 range. // 8E, A1-DF - 2 byte half-width Katakana // 8F, A1-FE, A1-FE - 3 byte JIX X 0212 range. WE DON'T USE JIS 0212!!! // // New thoughts: // Fixing windows 20932 code page so that all characters can be looked up there. // // Old thoughts: // Windows NLS uses a special CP20932 for EUC-JP, but it is not used by mlang. Windows // Maps the 3 byte ranges to the 2 byte CP20932 by masking the 2nd byte with & 0x7F. // MLang uses the native windows 932 code page, which is more reliable, however the code points // don't line up as nicely as the 20932 code page, however it doesn't have JIS X 0212 support. // // So what we do is: // 1. For ASCII, leave it alone // 2. For half-width Katakana, use the leading byte and convert with 20936 code page. // 3. For JIS X 0208, Use the leading & trailing bytes with 20936 code page // 4. For JIS X 0212, Remove the lead byte, & 0xFF7F, and use the CP20936 table to convert. // // Regarding Normalization: // Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings // Form D is precluded because of 0x00a8, which changes to space + dierises. // // I think that IsAlwaysNormalized should probably return true for form C (but not certain) // // NOTE: We don't use JIS 0212 so we are basically a DBCS code page, we just have to modify // the 932 table we're basing this on. // using System; [Serializable] internal class EUCJPEncoding : DBCSCodePageEncoding { // This pretends to be CP 932 as far as memory tables are concerned. [System.Security.SecurityCritical] // auto-generated public EUCJPEncoding() : base(51932, 932) { this.m_bUseMlangTypeForSerialization = true; } [System.Security.SecurityCritical] // auto-generated protected unsafe override String GetMemorySectionName() { int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage; String strName = String.Format(CultureInfo.InvariantCulture, "CodePage_{0}_{1}_{2}_{3}_{4}_EUCJP", iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor, this.pCodePage->VersionRevision, this.pCodePage->VersionBuild); return strName; } // Clean up characters for EUC-JP code pages, etc. protected override bool CleanUpBytes(ref int bytes) { if (bytes >= 0x100) { // map extended char (0xfa40-0xfc4b) to a special range // (ported from mlang) if (bytes >= 0xfa40 && bytes <= 0xfc4b) { if ( bytes >= 0xfa40 && bytes <= 0xfa5b ) { if ( bytes <= 0xfa49 ) bytes = bytes - 0x0b51 ; else if ( bytes >= 0xfa4a && bytes <= 0xfa53 ) bytes = bytes - 0x072f6 ; else if ( bytes >= 0xfa54 && bytes <= 0xfa57 ) bytes = bytes - 0x0b5b ; else if ( bytes == 0xfa58 ) bytes = 0x878a ; else if ( bytes == 0xfa59 ) bytes = 0x8782 ; else if ( bytes == 0xfa5a ) bytes = 0x8784 ; else if ( bytes == 0xfa5b ) bytes = 0x879a ; } else if ( bytes >= 0xfa5c && bytes <= 0xfc4b ) { byte tc = unchecked((byte)bytes); if ( tc < 0x5c ) bytes = bytes - 0x0d5f; else if ( tc >= 0x80 && tc <= 0x9B ) bytes = bytes - 0x0d1d; else bytes = bytes - 0x0d1c; } } // Convert 932 code page to 20932 like code page range // (also ported from mlang) byte bLead = unchecked((byte)(bytes >> 8)); byte bTrail = unchecked((byte)bytes); bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71); bLead = (byte)((bLead << 1) + 1); if (bTrail > (byte)0x9e) { bTrail -= (byte)0x7e; bLead++; } else { if (bTrail > (byte)0x7e) bTrail--; bTrail -= (byte)0x1f; } bytes = ((int)bLead) << 8 | (int)bTrail | 0x8080; // // Don't step on our katakana special hack plane, if katakana space return false. // if (bytes >= 0x8E00 && bytes <= 0x8EFF) // return false; // Don't step out of our allocated lead byte area. // All DBCS lead and trail bytes should be >= 0xa1 and <= 0xfe if ((bytes & 0xFF00) < 0xa100 || (bytes & 0xFF00) > 0xfe00 || (bytes & 0xFF) < 0xa1 || (bytes & 0xFF) > 0xfe) return false; // WARNING: Our funky mapping allows illegal values, which we continue to use // so that we're compatible with Everett. } else { // For 51932 1/2 Katakana gets a 0x8E lead byte // Adjust 1/2 Katakana if (bytes >= 0xa1 && bytes <= 0xdf) { bytes |= 0x8E00; return true; } // 0x81-0x9f and 0xe0-0xfc CP 932 // 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though) // b0-df is 1/2 Katakana // So 81-9f & e0-fc are 932 lead bytes, a1-fe are our lead bytes // so ignore everything above 0x80 except 0xa0 and 0xff if (bytes >= 0x81 && bytes != 0xa0 && bytes != 0xff) { // We set diffent lead bytes later, so just return false return false; } } return true; } [System.Security.SecurityCritical] // auto-generated protected override unsafe void CleanUpEndBytes(char* chars) { // Need to special case CP 51932 // 0x81-0x9f and 0xe0-0xfc CP 932 // 0x8e and 0xa1-0xfe CP 20932 // 0x10 and 0x21-0x9? Us (remapping 932) // b0-df is 1/2 Katakana (trail byte) // A1-FE are DBCS code points for (int i = 0xA1; i <= 0xFE; i++) chars[i] = LEAD_BYTE_CHAR; // And 8E is lead byte for Katakana (already set) chars[0x8e] = LEAD_BYTE_CHAR; } } } #endif // FEATURE_CODEPAGES_FILE // File provided for Reference Use Only by Microsoft Corporation (c) 2007. // ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== #if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding namespace System.Text { using System.Text; using System.Globalization; // EUCJPEncoding // // EUC-JP Encoding (51932) // // EUC-JP has the following code points: // 00-7F - ASCII // 80-8D & 90-9F - Control. (Like Unicode, except for 8e and 8f) // A1-FE, A1-FE - 2 byte JIS X 0208 range. // 8E, A1-DF - 2 byte half-width Katakana // 8F, A1-FE, A1-FE - 3 byte JIX X 0212 range. WE DON'T USE JIS 0212!!! // // New thoughts: // Fixing windows 20932 code page so that all characters can be looked up there. // // Old thoughts: // Windows NLS uses a special CP20932 for EUC-JP, but it is not used by mlang. Windows // Maps the 3 byte ranges to the 2 byte CP20932 by masking the 2nd byte with & 0x7F. // MLang uses the native windows 932 code page, which is more reliable, however the code points // don't line up as nicely as the 20932 code page, however it doesn't have JIS X 0212 support. // // So what we do is: // 1. For ASCII, leave it alone // 2. For half-width Katakana, use the leading byte and convert with 20936 code page. // 3. For JIS X 0208, Use the leading & trailing bytes with 20936 code page // 4. For JIS X 0212, Remove the lead byte, & 0xFF7F, and use the CP20936 table to convert. // // Regarding Normalization: // Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings // Form D is precluded because of 0x00a8, which changes to space + dierises. // // I think that IsAlwaysNormalized should probably return true for form C (but not certain) // // NOTE: We don't use JIS 0212 so we are basically a DBCS code page, we just have to modify // the 932 table we're basing this on. // using System; [Serializable] internal class EUCJPEncoding : DBCSCodePageEncoding { // This pretends to be CP 932 as far as memory tables are concerned. [System.Security.SecurityCritical] // auto-generated public EUCJPEncoding() : base(51932, 932) { this.m_bUseMlangTypeForSerialization = true; } [System.Security.SecurityCritical] // auto-generated protected unsafe override String GetMemorySectionName() { int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage; String strName = String.Format(CultureInfo.InvariantCulture, "CodePage_{0}_{1}_{2}_{3}_{4}_EUCJP", iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor, this.pCodePage->VersionRevision, this.pCodePage->VersionBuild); return strName; } // Clean up characters for EUC-JP code pages, etc. protected override bool CleanUpBytes(ref int bytes) { if (bytes >= 0x100) { // map extended char (0xfa40-0xfc4b) to a special range // (ported from mlang) if (bytes >= 0xfa40 && bytes <= 0xfc4b) { if ( bytes >= 0xfa40 && bytes <= 0xfa5b ) { if ( bytes <= 0xfa49 ) bytes = bytes - 0x0b51 ; else if ( bytes >= 0xfa4a && bytes <= 0xfa53 ) bytes = bytes - 0x072f6 ; else if ( bytes >= 0xfa54 && bytes <= 0xfa57 ) bytes = bytes - 0x0b5b ; else if ( bytes == 0xfa58 ) bytes = 0x878a ; else if ( bytes == 0xfa59 ) bytes = 0x8782 ; else if ( bytes == 0xfa5a ) bytes = 0x8784 ; else if ( bytes == 0xfa5b ) bytes = 0x879a ; } else if ( bytes >= 0xfa5c && bytes <= 0xfc4b ) { byte tc = unchecked((byte)bytes); if ( tc < 0x5c ) bytes = bytes - 0x0d5f; else if ( tc >= 0x80 && tc <= 0x9B ) bytes = bytes - 0x0d1d; else bytes = bytes - 0x0d1c; } } // Convert 932 code page to 20932 like code page range // (also ported from mlang) byte bLead = unchecked((byte)(bytes >> 8)); byte bTrail = unchecked((byte)bytes); bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71); bLead = (byte)((bLead << 1) + 1); if (bTrail > (byte)0x9e) { bTrail -= (byte)0x7e; bLead++; } else { if (bTrail > (byte)0x7e) bTrail--; bTrail -= (byte)0x1f; } bytes = ((int)bLead) << 8 | (int)bTrail | 0x8080; // // Don't step on our katakana special hack plane, if katakana space return false. // if (bytes >= 0x8E00 && bytes <= 0x8EFF) // return false; // Don't step out of our allocated lead byte area. // All DBCS lead and trail bytes should be >= 0xa1 and <= 0xfe if ((bytes & 0xFF00) < 0xa100 || (bytes & 0xFF00) > 0xfe00 || (bytes & 0xFF) < 0xa1 || (bytes & 0xFF) > 0xfe) return false; // WARNING: Our funky mapping allows illegal values, which we continue to use // so that we're compatible with Everett. } else { // For 51932 1/2 Katakana gets a 0x8E lead byte // Adjust 1/2 Katakana if (bytes >= 0xa1 && bytes <= 0xdf) { bytes |= 0x8E00; return true; } // 0x81-0x9f and 0xe0-0xfc CP 932 // 0x8e and 0xa1-0xfe CP 20932 (we don't use 8e though) // b0-df is 1/2 Katakana // So 81-9f & e0-fc are 932 lead bytes, a1-fe are our lead bytes // so ignore everything above 0x80 except 0xa0 and 0xff if (bytes >= 0x81 && bytes != 0xa0 && bytes != 0xff) { // We set diffent lead bytes later, so just return false return false; } } return true; } [System.Security.SecurityCritical] // auto-generated protected override unsafe void CleanUpEndBytes(char* chars) { // Need to special case CP 51932 // 0x81-0x9f and 0xe0-0xfc CP 932 // 0x8e and 0xa1-0xfe CP 20932 // 0x10 and 0x21-0x9? Us (remapping 932) // b0-df is 1/2 Katakana (trail byte) // A1-FE are DBCS code points for (int i = 0xA1; i <= 0xFE; i++) chars[i] = LEAD_BYTE_CHAR; // And 8E is lead byte for Katakana (already set) chars[0x8e] = LEAD_BYTE_CHAR; } } } #endif // FEATURE_CODEPAGES_FILE // File provided for Reference Use Only by Microsoft Corporation (c) 2007.
Link Menu

This book is available now!
Buy at Amazon US or
Buy at Amazon UK
- RowBinding.cs
- CacheHelper.cs
- TdsValueSetter.cs
- SqlConnectionManager.cs
- DynamicEntity.cs
- DrawTreeNodeEventArgs.cs
- PermissionToken.cs
- _NegotiateClient.cs
- BooleanAnimationBase.cs
- TextTreeUndo.cs
- ConfigXmlSignificantWhitespace.cs
- EastAsianLunisolarCalendar.cs
- ServiceModelConfigurationElementCollection.cs
- DesignOnlyAttribute.cs
- EventToken.cs
- TemplateInstanceAttribute.cs
- SqlNodeTypeOperators.cs
- DetailsViewCommandEventArgs.cs
- ToolTipAutomationPeer.cs
- ListItemConverter.cs
- SqlClientMetaDataCollectionNames.cs
- ApplicationActivator.cs
- XmlDigitalSignatureProcessor.cs
- AttachedPropertyBrowsableAttribute.cs
- Typeface.cs
- HttpRuntimeSection.cs
- PropagatorResult.cs
- LinqDataSource.cs
- WebPartConnectionsEventArgs.cs
- CryptoApi.cs
- SkipStoryboardToFill.cs
- AuthorizationSection.cs
- OLEDB_Util.cs
- SolidBrush.cs
- DesignerActionItemCollection.cs
- ContentValidator.cs
- invalidudtexception.cs
- AstTree.cs
- QilReplaceVisitor.cs
- WaitForChangedResult.cs
- Enum.cs
- TypeConverterAttribute.cs
- InfocardExtendedInformationCollection.cs
- MissingSatelliteAssemblyException.cs
- WmlValidationSummaryAdapter.cs
- XmlComplianceUtil.cs
- DesignerVerb.cs
- RequestCacheManager.cs
- XmlSchemaCollection.cs
- BindingValueChangedEventArgs.cs
- DataSourceXmlElementAttribute.cs
- SplitterEvent.cs
- Adorner.cs
- Visual3D.cs
- HtmlControlDesigner.cs
- Int64Animation.cs
- Cursor.cs
- AddInBase.cs
- grammarelement.cs
- TreeViewEvent.cs
- _AcceptOverlappedAsyncResult.cs
- PropertyManager.cs
- ReturnType.cs
- CqlQuery.cs
- PersonalizationProvider.cs
- WithParamAction.cs
- ListBase.cs
- DataGridSortCommandEventArgs.cs
- ButtonFieldBase.cs
- GridPattern.cs
- SessionPageStateSection.cs
- Point3D.cs
- indexingfiltermarshaler.cs
- EntityContainerEntitySet.cs
- ColumnHeaderConverter.cs
- InvalidChannelBindingException.cs
- WebWorkflowRole.cs
- ColorAnimationBase.cs
- Geometry.cs
- ToolZone.cs
- MethodRental.cs
- GenericIdentity.cs
- TypedDataSetSchemaImporterExtension.cs
- DynamicDiscoveryDocument.cs
- AssociationTypeEmitter.cs
- BrowserDefinition.cs
- RuntimeArgumentHandle.cs
- HostedHttpRequestAsyncResult.cs
- PreservationFileWriter.cs
- ContactManager.cs
- Content.cs
- TypeConverterValueSerializer.cs
- EntityDataSourceMemberPath.cs
- versioninfo.cs
- TextFragmentEngine.cs
- SQLMembershipProvider.cs
- Rectangle.cs
- XmlRawWriter.cs
- SerializationInfoEnumerator.cs
- COM2IManagedPerPropertyBrowsingHandler.cs