字符串排序算法
不懒人 人气:2字母表数据结构
package string; import edu.princeton.cs.algs4.StdOut; public class Alphabet { public static final Alphabet BINARY = new Alphabet("01"); public static final Alphabet OCTAL = new Alphabet("01234567"); public static final Alphabet DECIMAL = new Alphabet("0123456789"); public static final Alphabet HEXADECIMAL = new Alphabet("0123456789ABCDEF"); public static final Alphabet DNA = new Alphabet("ACGT"); public static final Alphabet LOWERCASE = new Alphabet("abcdefghijklmnopqrstuvwxyz"); public static final Alphabet UPPERCASE = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); public static final Alphabet PROTEIN = new Alphabet("ACDEFGHIKLMNPQRSTVWY"); public static final Alphabet BASE64 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); public static final Alphabet ASCII = new Alphabet(128); public static final Alphabet EXTENDED_ASCII = new Alphabet(256); public static final Alphabet UNICODE16 = new Alphabet(65536); private char[] alphabet; // the characters in the alphabet private int[] inverse; // indices private final int R; // the radix of the alphabet public Alphabet(String alpha) { // check that alphabet contains no duplicate chars boolean[] unicode = new boolean[Character.MAX_VALUE]; for (int i = 0; i < alpha.length(); i++) { char c = alpha.charAt(i); if (unicode[c]) throw new IllegalArgumentException("Illegal alphabet: repeated character = '" + c + "'"); unicode[c] = true; } alphabet = alpha.toCharArray(); R = alpha.length(); inverse = new int[Character.MAX_VALUE]; for (int i = 0; i < inverse.length; i++) inverse[i] = -1; // can't use char since R can be as big as 65,536 for (int c = 0; c < R; c++) inverse[alphabet[c]] = c; } private Alphabet(int radix) { this.R = radix; alphabet = new char[R]; inverse = new int[R]; // can't use char since R can be as big as 65,536 for (int i = 0; i < R; i++) alphabet[i] = (char) i; for (int i = 0; i < R; i++) inverse[i] = i; } public Alphabet() { this(256); } public boolean contains(char c) { return inverse[c] != -1; } @Deprecated public int R() { return R; } public int radix() { return R; } public int lgR() { int lgR = 0; for (int t = R-1; t >= 1; t /= 2) lgR++; return lgR; } public int toIndex(char c) { if (c >= inverse.length || inverse[c] == -1) { throw new IllegalArgumentException("Character " + c + " not in alphabet"); } return inverse[c]; } public int[] toIndices(String s) { char[] source = s.toCharArray(); int[] target = new int[s.length()]; for (int i = 0; i < source.length; i++) target[i] = toIndex(source[i]); return target; } public char toChar(int index) { if (index < 0 || index >= R) { throw new IllegalArgumentException("index must be between 0 and " + R + ": " + index); } return alphabet[index]; } public String toChars(int[] indices) { StringBuilder s = new StringBuilder(indices.length); for (int i = 0; i < indices.length; i++) s.append(toChar(indices[i])); return s.toString(); } public static void main(String[] args) { int[] encoded1 = Alphabet.BASE64.toIndices("NowIsTheTimeForAllGoodMen"); String decoded1 = Alphabet.BASE64.toChars(encoded1); StdOut.println(decoded1); int[] encoded2 = Alphabet.DNA.toIndices("AACGAACGGTTTACCCCG"); String decoded2 = Alphabet.DNA.toChars(encoded2); StdOut.println(decoded2); int[] encoded3 = Alphabet.DECIMAL.toIndices("01234567890123456789"); String decoded3 = Alphabet.DECIMAL.toChars(encoded3); StdOut.println(decoded3); } }
一.字符串排序
1.键索引计数法
2.低位优先的字符串排序(字符串长度相同)
package string; import edu.princeton.cs.algs4.StdIn; import edu.princeton.cs.algs4.StdOut; public class LSD { private static final int BITS_PER_BYTE = 8; private LSD() { } public static void sort(String[] a, int w) { int n = a.length; int R = 256; // extend ASCII alphabet size String[] aux = new String[n]; for (int d = w-1; d >= 0; d--) { // sort by key-indexed counting on dth character // compute frequency counts int[] count = new int[R+1]; for (int i = 0; i < n; i++) count[a[i].charAt(d) + 1]++; // compute cumulates for (int r = 0; r < R; r++) count[r+1] += count[r]; // move data for (int i = 0; i < n; i++) aux[count[a[i].charAt(d)]++] = a[i]; // copy back for (int i = 0; i < n; i++) a[i] = aux[i]; } } public static void sort(int[] a) { final int BITS = 32; // each int is 32 bits final int R = 1 << BITS_PER_BYTE; // each bytes is between 0 and 255 final int MASK = R - 1; // 0xFF final int w = BITS / BITS_PER_BYTE; // each int is 4 bytes int n = a.length; int[] aux = new int[n]; for (int d = 0; d < w; d++) { // compute frequency counts int[] count = new int[R+1]; for (int i = 0; i < n; i++) { int c = (a[i] >> BITS_PER_BYTE*d) & MASK; count[c + 1]++; } // compute cumulates for (int r = 0; r < R; r++) count[r+1] += count[r]; // for most significant byte, 0x80-0xFF comes before 0x00-0x7F if (d == w-1) { int shift1 = count[R] - count[R/2]; int shift2 = count[R/2]; for (int r = 0; r < R/2; r++) count[r] += shift1; for (int r = R/2; r < R; r++) count[r] -= shift2; } // move data for (int i = 0; i < n; i++) { int c = (a[i] >> BITS_PER_BYTE*d) & MASK; aux[count[c]++] = a[i]; } // copy back for (int i = 0; i < n; i++) a[i] = aux[i]; } } public static void main(String[] args) { String[] a = StdIn.readAllStrings(); int n = a.length; // check that strings have fixed length int w = a[0].length(); for (int i = 0; i < n; i++) assert a[i].length() == w : "Strings must have fixed length"; // sort the strings sort(a, w); // print results for (int i = 0; i < n; i++) StdOut.println(a[i]); } }
3.高位优先的字符串排序
package string; import edu.princeton.cs.algs4.StdIn; import edu.princeton.cs.algs4.StdOut; public class MSD { private static final int BITS_PER_BYTE = 8; private static final int BITS_PER_INT = 32; // each Java int is 32 bits private static final int R = 256; // extended ASCII alphabet size private static final int CUTOFF = 15; // cutoff to insertion sort // do not instantiate private MSD() { } public static void sort(String[] a) { int n = a.length; String[] aux = new String[n]; sort(a, 0, n-1, 0, aux); } // return dth character of s, -1 if d = length of string private static int charAt(String s, int d) { assert d >= 0 && d <= s.length(); if (d == s.length()) return -1; return s.charAt(d); } // sort from a[lo] to a[hi], starting at the dth character private static void sort(String[] a, int lo, int hi, int d, String[] aux) { // cutoff to insertion sort for small subarrays if (hi <= lo + CUTOFF) { insertion(a, lo, hi, d); return; } // compute frequency counts int[] count = new int[R+2]; for (int i = lo; i <= hi; i++) { int c = charAt(a[i], d); count[c+2]++; } // transform counts to indicies for (int r = 0; r < R+1; r++) count[r+1] += count[r]; // distribute for (int i = lo; i <= hi; i++) { int c = charAt(a[i], d); aux[count[c+1]++] = a[i]; } // copy back for (int i = lo; i <= hi; i++) a[i] = aux[i - lo]; // recursively sort for each character (excludes sentinel -1) for (int r = 0; r < R; r++) sort(a, lo + count[r], lo + count[r+1] - 1, d+1, aux); } // insertion sort a[lo..hi], starting at dth character private static void insertion(String[] a, int lo, int hi, int d) { for (int i = lo; i <= hi; i++) for (int j = i; j > lo && less(a[j], a[j-1], d); j--) exch(a, j, j-1); } // exchange a[i] and a[j] private static void exch(String[] a, int i, int j) { String temp = a[i]; a[i] = a[j]; a[j] = temp; } // is v less than w, starting at character d private static boolean less(String v, String w, int d) { // assert v.substring(0, d).equals(w.substring(0, d)); for (int i = d; i < Math.min(v.length(), w.length()); i++) { if (v.charAt(i) < w.charAt(i)) return true; if (v.charAt(i) > w.charAt(i)) return false; } return v.length() < w.length(); } public static void sort(int[] a) { int n = a.length; int[] aux = new int[n]; sort(a, 0, n-1, 0, aux); } // MSD sort from a[lo] to a[hi], starting at the dth byte private static void sort(int[] a, int lo, int hi, int d, int[] aux) { // cutoff to insertion sort for small subarrays if (hi <= lo + CUTOFF) { insertion(a, lo, hi, d); return; } // compute frequency counts (need R = 256) int[] count = new int[R+1]; int mask = R - 1; // 0xFF; int shift = BITS_PER_INT - BITS_PER_BYTE*d - BITS_PER_BYTE; for (int i = lo; i <= hi; i++) { int c = (a[i] >> shift) & mask; count[c + 1]++; } // transform counts to indicies for (int r = 0; r < R; r++) count[r+1] += count[r]; for (int i = lo; i <= hi; i++) { int c = (a[i] >> shift) & mask; aux[count[c]++] = a[i]; } // copy back for (int i = lo; i <= hi; i++) a[i] = aux[i - lo]; // no more bits if (d == 4) return; // recursively sort for each character if (count[0] > 0) sort(a, lo, lo + count[0] - 1, d+1, aux); for (int r = 0; r < R; r++) if (count[r+1] > count[r]) sort(a, lo + count[r], lo + count[r+1] - 1, d+1, aux); } // TODO: insertion sort a[lo..hi], starting at dth character private static void insertion(int[] a, int lo, int hi, int d) { for (int i = lo; i <= hi; i++) for (int j = i; j > lo && a[j] < a[j-1]; j--) exch(a, j, j-1); } // exchange a[i] and a[j] private static void exch(int[] a, int i, int j) { int temp = a[i]; a[i] = a[j]; a[j] = temp; } public static void main(String[] args) { String[] a = StdIn.readAllStrings(); int n = a.length; sort(a); for (int i = 0; i < n; i++) StdOut.println(a[i]); } }
4.三向字符串快速排序
package string; import edu.princeton.cs.algs4.StdIn; import edu.princeton.cs.algs4.StdOut; import edu.princeton.cs.algs4.StdRandom; public class Quick3string { private static final int CUTOFF = 15; // cutoff to insertion sort // do not instantiate private Quick3string() { } public static void sort(String[] a) { StdRandom.shuffle(a); sort(a, 0, a.length-1, 0); assert isSorted(a); } // return the dth character of s, -1 if d = length of s private static int charAt(String s, int d) { assert d >= 0 && d <= s.length(); if (d == s.length()) return -1; return s.charAt(d); } // 3-way string quicksort a[lo..hi] starting at dth character private static void sort(String[] a, int lo, int hi, int d) { // cutoff to insertion sort for small subarrays if (hi <= lo + CUTOFF) { insertion(a, lo, hi, d); return; } int lt = lo, gt = hi; int v = charAt(a[lo], d); int i = lo + 1; while (i <= gt) { int t = charAt(a[i], d); if (t < v) exch(a, lt++, i++); else if (t > v) exch(a, i, gt--); else i++; } // a[lo..lt-1] < v = a[lt..gt] < a[gt+1..hi]. sort(a, lo, lt-1, d); if (v >= 0) sort(a, lt, gt, d+1); sort(a, gt+1, hi, d); } // sort from a[lo] to a[hi], starting at the dth character private static void insertion(String[] a, int lo, int hi, int d) { for (int i = lo; i <= hi; i++) for (int j = i; j > lo && less(a[j], a[j-1], d); j--) exch(a, j, j-1); } // exchange a[i] and a[j] private static void exch(String[] a, int i, int j) { String temp = a[i]; a[i] = a[j]; a[j] = temp; } // is v less than w, starting at character d // DEPRECATED BECAUSE OF SLOW SUBSTRING EXTRACTION IN JAVA 7 // private static boolean less(String v, String w, int d) { // assert v.substring(0, d).equals(w.substring(0, d)); // return v.substring(d).compareTo(w.substring(d)) < 0; // } // is v less than w, starting at character d private static boolean less(String v, String w, int d) { assert v.substring(0, d).equals(w.substring(0, d)); for (int i = d; i < Math.min(v.length(), w.length()); i++) { if (v.charAt(i) < w.charAt(i)) return true; if (v.charAt(i) > w.charAt(i)) return false; } return v.length() < w.length(); } // is the array sorted private static boolean isSorted(String[] a) { for (int i = 1; i < a.length; i++) if (a[i].compareTo(a[i-1]) < 0) return false; return true; } public static void main(String[] args) { // read in the strings from standard input String[] a = StdIn.readAllStrings(); int n = a.length; // sort the strings sort(a); // print the results for (int i = 0; i < n; i++) StdOut.println(a[i]); } }
二.单词查找树
加载全部内容