View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.util;
5   
6   import java.util.ArrayList;
7   import java.util.Iterator;
8   import java.util.List;
9   
10  public class StringUtil {
11  
12  	public static final String[] EMPTY_STRINGS = new String[0];
13      private static final boolean supportsUTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");
14      private static final String[] ENTITIES;
15  
16      static {
17          ENTITIES = new String[256 - 126];
18          for (int i = 126; i <= 255; i++) {
19              ENTITIES[i - 126] = "&#" + i + ';';
20          }
21      }
22  
23      public static String replaceString(String original, char oldChar, String newString) {
24          
25      	String fixedNew = newString == null ? "" : newString;
26  
27          StringBuffer desc = new StringBuffer();
28          int index = original.indexOf(oldChar);
29          int last = 0;
30          while (index != -1) {
31              desc.append(original.substring(last, index));
32              desc.append(fixedNew);
33              last = index + 1;
34              index = original.indexOf(oldChar, last);
35          }
36          desc.append(original.substring(last));
37          return desc.toString();
38      }
39  
40      public static String replaceString(String original, String oldString, String newString) {
41      	
42      	String fixedNew = newString == null ? "" : newString;
43      	
44          StringBuffer desc = new StringBuffer();
45          int index = original.indexOf(oldString);
46          int last = 0;
47          while (index != -1) {
48              desc.append(original.substring(last, index));
49              desc.append(fixedNew);
50              last = index + oldString.length();
51              index = original.indexOf(oldString, last);
52          }
53          desc.append(original.substring(last));
54          return desc.toString();
55      }
56  
57      /**
58       * Appends to a StringBuffer the String src where non-ASCII and
59       * XML special chars are escaped.
60       *
61       * @param buf The destination XML stream
62       * @param src The String to append to the stream
63       */
64      public static void appendXmlEscaped(StringBuffer buf, String src) {
65          appendXmlEscaped(buf, src, supportsUTF8);
66      }
67  
68      public static String htmlEncode(String string) {
69          String encoded = StringUtil.replaceString(string, '&', "&amp;");
70          encoded = StringUtil.replaceString(encoded, '<', "&lt;");
71          return StringUtil.replaceString(encoded, '>', "&gt;");
72      }
73      
74      // TODO - unify the method above with the one below
75      
76      private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {
77          char c;
78          for (int i = 0; i < src.length(); i++) {
79              c = src.charAt(i);
80              if (c > '~') {// 126
81                  if (!supportUTF8) {
82                      if (c <= 255) {
83                          buf.append(ENTITIES[c - 126]);
84                      } else {
85                          buf.append("&u").append(Integer.toHexString(c)).append(';');
86                      }
87                  } else {
88                      buf.append(c);
89                  }
90              } else if (c == '&')
91                  buf.append("&amp;");
92              else if (c == '"')
93                  buf.append("&quot;");
94              else if (c == '<')
95                  buf.append("&lt;");
96              else if (c == '>')
97                  buf.append("&gt;");
98              else
99                  buf.append(c);
100         }
101     }
102 
103 	/**
104 	 * Parses the input source using the delimiter specified. This method is much
105 	 * faster than using the StringTokenizer or String.split(char) approach and
106 	 * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
107      *
108      * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
109 	 *
110 	 * @param source String
111 	 * @param delimiter char
112 	 * @return String[]
113 	 */
114 	public static String[] substringsOf(String source, char delimiter) {
115 
116 		if (source == null || source.length() == 0) {
117             return EMPTY_STRINGS;
118         }
119 		
120 		int delimiterCount = 0;
121 		int length = source.length();
122 		char[] chars = source.toCharArray();
123 
124 		for (int i=0; i<length; i++) {
125 			if (chars[i] == delimiter) delimiterCount++;
126 			}
127 
128 		if (delimiterCount == 0) return new String[] { source };
129 
130 		String results[] = new String[delimiterCount+1];
131 
132 		int i = 0;
133 		int offset = 0;
134 
135 		while (offset <= length) {
136 			int pos = source.indexOf(delimiter, offset);
137 			if (pos < 0) pos = length;
138 			results[i++] = pos == offset ? "" : source.substring(offset, pos);
139 			offset = pos + 1;
140 			}
141 
142 		return results;
143 	}
144 	
145 	/**
146 	 * Much more efficient than StringTokenizer.
147 	 * 
148 	 * @param str String
149 	 * @param separator char
150 	 * @return String[]
151 	 */
152 	  public static String[] substringsOf(String str, String separator) {
153 		  
154 	        if (str == null || str.length() == 0) {
155 	            return EMPTY_STRINGS;
156 	        }
157 
158 	        int index = str.indexOf(separator);
159 	        if (index == -1) {
160 	            return new String[]{str};
161 	        }
162 
163 	        List<String> list = new ArrayList<String>();
164 	        int currPos = 0;
165 	        int len = separator.length();
166 	        while (index != -1) {
167 	            list.add(str.substring(currPos, index));
168 	            currPos = index + len;
169 	            index = str.indexOf(separator, currPos);
170 	        }
171 	        list.add(str.substring(currPos));
172 	        return list.toArray(new String[list.size()]);
173 	    }
174 	
175 	
176 	/**
177 	 * Copies the elements returned by the iterator onto the string buffer
178 	 * each delimited by the separator.
179 	 *
180 	 * @param sb StringBuffer
181 	 * @param iter Iterator
182 	 * @param separator String
183 	 */
184 	public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {
185 		
186 	    if (!iter.hasNext()) return;
187 	    
188 	    sb.append(iter.next());
189 	    
190 	    while (iter.hasNext()) {
191 	    	sb.append(separator);
192 	        sb.append(iter.next());
193 	    }
194 	}
195 	/**
196 	 * Return the length of the shortest string in the array.
197 	 * If any one of them is null then it returns 0.
198 	 * 
199 	 * @param strings String[]
200 	 * @return int
201 	 */
202 	public static int lengthOfShortestIn(String[] strings) {
203 		
204 		int minLength = Integer.MAX_VALUE;
205 		
206 		for (int i=0; i<strings.length; i++) {
207 			if (strings[i] == null) return 0;
208 			minLength = Math.min(minLength, strings[i].length());
209 		}
210 		
211 		return minLength;
212 	}
213 	
214 	/**
215 	 * Determine the maximum number of common leading whitespace characters
216 	 * the strings share in the same sequence. Useful for determining how
217 	 * many leading characters can be removed to shift all the text in the
218 	 * strings to the left without misaligning them.
219 	 * 
220 	 * @param strings String[]
221 	 * @return int
222 	 */
223 	public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
224 		
225 		int shortest = lengthOfShortestIn(strings);
226 		if (shortest == 0) return 0;
227 		
228 		char[] matches = new char[shortest];
229 		
230 		String str;
231 		for (int m=0; m<matches.length; m++) {
232 			matches[m] = strings[0].charAt(m);
233 			if (!Character.isWhitespace(matches[m])) return m;
234 			for (int i=0; i<strings.length; i++) {
235 				str = strings[i];
236 				if (str.charAt(m) != matches[m])  return m; 
237 				}
238 		}
239 		
240 		return shortest;
241 	}
242 	
243 	/**
244 	 * Trims off the leading characters off the strings up to the trimDepth 
245 	 * specified. Returns the same strings if trimDepth = 0
246 	 * 
247 	 * @param strings
248 	 * @param trimDepth
249 	 * @return String[]
250 	 */
251 	public static String[] trimStartOn(String[] strings, int trimDepth) {
252 		
253 		if (trimDepth == 0) return strings;
254 		
255 		String[] results = new String[strings.length];
256 		for (int i=0; i<strings.length; i++) {
257 			results[i] = strings[i].substring(trimDepth);
258 		}
259 		return results;
260    }
261 	
262     /**
263      * Left pads a string.
264      * @param s The String to pad
265      * @param length The desired minimum length of the resulting padded String
266      * @return The resulting left padded String
267      */
268     public static String lpad(String s, int length) {
269          String res = s;
270          if (length - s.length() > 0) {
271              char [] arr = new char[length - s.length()];
272              java.util.Arrays.fill(arr, ' ');
273              res = new StringBuffer(length).append(arr).append(s).toString();
274          }
275          return res;
276     }
277     
278     /**
279      * Are the two String values the same.
280      * The Strings can be optionally trimmed before checking.
281      * The Strings can be optionally compared ignoring case.
282      * The Strings can be have embedded whitespace standardized before comparing.
283      * Two null values are treated as equal.
284      * 
285      * @param s1 The first String.
286      * @param s2 The second String.
287      * @param trim Indicates if the Strings should be trimmed before comparison.
288      * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
289      * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
290      * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
291      */
292     public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {
293 		if (s1 == s2) {
294 			return true;
295 		} else if (s1 == null || s2 == null) {
296 			return false;
297 		} else {
298 			if (trim) {
299 				s1 = s1.trim();
300 				s2 = s2.trim();
301 			}
302 			if (standardizeWhitespace) {
303 				// Replace all whitespace with a standard single space character.
304 				s1 = s1.replaceAll("\\s+", " ");
305 				s2 = s2.replaceAll("\\s+", " ");
306 			}
307 			return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
308 		}
309     }
310 }