Hidden Characters Pasted in Textboxes

I had a client who’s users were occassionally doing data entry that was causing some downstream XML processing to fail. Only, when viewing the XML everything appeared correctly. I was able to track down the culprit: copying and pasting hidden characters into a textbox along with the rest of the text.

I created some simple extension methods to help sanitize input catching the rogue characters. You have the options to either remove characters or replace them if warranted.

public static class StringExtensions
    {
        public static string RemoveNonXmlCharacters(this string input)
        {
            return new string(input.Where(ch => XmlConvert.IsXmlChar(ch)).ToArray());
        }

        public static string RemoveControlCharacters(this string input)
        {
            return new string(input.Where(c => !char.IsControl(c)).ToArray());
        }

        public static string RemoveUnsafeCharacters(this string input)
        {
            return input.RemoveControlCharacters().RemoveNonXmlCharacters();
        }

        public static string ReplaceControlCharacters(this string input, string replacementCharacter)
        {
            var sanitizedReplacementCharacters = replacementCharacter.RemoveControlCharacters();
            char[] controlCharactersToReplace = input.Where(c => char.IsControl(c)).ToArray();
            string[] inputSplitByControlCharacters = input.Split(controlCharactersToReplace);
            return String.Join(sanitizedReplacementCharacters, inputSplitByControlCharacters);
        }

        public static string ReplaceNonXmlCharacters(this string input, string replacementCharacter)
        {
            var sanitizedReplacementCharacters = replacementCharacter.RemoveNonXmlCharacters();
            char[] controlCharactersToReplace = input.Where(c => !XmlConvert.IsXmlChar(c)).ToArray();
            string[] inputSplitByNonXMLCharacters = input.Split(controlCharactersToReplace);
            return String.Join(sanitizedReplacementCharacters, inputSplitByNonXMLCharacters);
        }

        public static string ReplaceUnsafeCharacters(this string input, string replaceCharacter)
        {
            return input.ReplaceControlCharacters(replaceCharacter).ReplaceNonXmlCharacters(replaceCharacter);
        }
    }

Leave a Reply