Java LZ78压缩算法 Java 实现LZ78压缩算法的代码实例
简简单单OnlineZuozuo 人气:0想了解Java 实现LZ78压缩算法的代码实例的相关内容吗,简简单单OnlineZuozuo在本文为您仔细讲解Java LZ78压缩算法的相关知识和一些Code实例,欢迎阅读和指正,我们先划重点:Java,LZ78压缩算法,Java,,压缩算法,下面大家一起来学习吧。
LZ78 压缩算法的 Java 实现
1、压缩算法的实现
通过多路搜索树提高检索速度
package com.wretchant.lz78; import java.util.*; /** 多路英文单词查找树 */ class Trie { private TrieNode root; public Trie() { root = new TrieNode(); root.wordEnd = false; } public void insert(String word) { TrieNode node = root; for (int i = 0; i < word.length(); i++) { Character c = word.charAt(i); if (!node.childdren.containsKey(c)) { node.childdren.put(c, new TrieNode()); } node = node.childdren.get(c); } node.wordEnd = true; } public boolean search(String word) { TrieNode node = root; for (int i = 0; i < word.length(); i++) { Character c = word.charAt(i); if (!node.childdren.containsKey(c)) { return false; } node = node.childdren.get(c); } return node.wordEnd; } } class TrieNode { Map<Character, TrieNode> childdren; boolean wordEnd; public TrieNode() { childdren = new HashMap<Character, TrieNode>(); wordEnd = false; } } /** 编码表 */ class Output { private Integer index; private Character character; Output(Integer index, Character character) { this.index = index; this.character = character; } public Integer getIndex() { return index; } public Character getCharacter() { return character; } } class LZencode { @FunctionalInterface interface Encode { List<Output> encode(String message); } /** 构建多路搜索树 */ static Trie buildTree(Set<String> keys) { Trie trie = new Trie(); keys.forEach(trie::insert); return trie; } public static final Encode ENCODE = message -> { // 构建压缩后的编码表 List<Output> outputs = new ArrayList<>(); Map<String, Integer> treeDict = new HashMap<>(); int mLen = message.length(); int i = 0; while (i < mLen) { Set<String> keySet = treeDict.keySet(); // 生成多路搜索树 Trie trie = buildTree(keySet); char messageI = message.charAt(i); String messageIStr = String.valueOf(messageI); // 使用多路树进行搜索 if (!trie.search(messageIStr)) { outputs.add(new Output(0, messageI)); treeDict.put(messageIStr, treeDict.size() + 1); i++; } else if (i == mLen - 1) { outputs.add(new Output(treeDict.get(messageIStr), ' ')); i++; } else { for (int j = i + 1; j < mLen; j++) { String substring = message.substring(i, j + 1); String str = message.substring(i, j); // 使用多路树进行搜索 if (!trie.search(substring)) { outputs.add(new Output(treeDict.get(str), message.charAt(j))); treeDict.put(substring, treeDict.size() + 1); i = j + 1; break; } if (j == mLen - 1) { outputs.add(new Output(treeDict.get(substring), ' ')); i = j + 1; } } } } return outputs; }; }
2、解压缩算法的实现
package com.wretchant.lz78; import java.util.HashMap; import java.util.List; import java.util.Map; public class LZdecode { @FunctionalInterface interface Decode { /** @param outputs 编码表 @return 解码后的字符串 */ String decode(List<Output> outputs); } /** 根据编码表进行解码 */ public static final Decode DECODE = (List<Output> outputs) -> { StringBuilder unpacked = new StringBuilder(); Map<Integer, String> treeDict = new HashMap<>(); for (Output output : outputs) { Integer index = output.getIndex(); Character character = output.getCharacter(); if (index == 0) { unpacked.append(character); treeDict.put(treeDict.size() + 1, character.toString()); continue; } String term = "" + treeDict.get(index) + character; unpacked.append(term); treeDict.put(treeDict.size() + 1, term); } return unpacked.toString(); }; }
3、测试和使用
package com.wretchant.lz78; import java.io.InputStream; import java.util.List; import java.util.Scanner; import java.util.function.ToIntFunction; public class LZpack { public static final ToIntFunction<List<Output>> DICT_PRINT = outputs -> { outputs.forEach(output -> { System.out.println("index :" + output.getIndex() + " char :" + output.getCharacter()); }); return 1; }; public static void main(String[] args) { Scanner scanner = new Scanner(System.in); System.out.println("Please input text "); String input = scanner.nextLine(); LZencode.Encode encode = LZencode.ENCODE; List<Output> outputs = encode.encode(input); DICT_PRINT.applyAsInt(outputs); } }
测试结果如下
4、Python 版本的实现代码
def compress(message): tree_dict, m_len, i = {}, len(message), 0 while i < m_len: # case I if message[i] not in tree_dict.keys(): yield (0, message[i]) tree_dict[message[i]] = len(tree_dict) + 1 i += 1 # case III elif i == m_len - 1: yield (tree_dict.get(message[i]), '') i += 1 else: for j in range(i + 1, m_len): # case II if message[i:j + 1] not in tree_dict.keys(): yield (tree_dict.get(message[i:j]), message[j]) tree_dict[message[i:j + 1]] = len(tree_dict) + 1 i = j + 1 break # case III elif j == m_len - 1: yield (tree_dict.get(message[i:j + 1]), '') i = j + 1 def uncompress(packed): unpacked, tree_dict = '', {} for index, ch in packed: if index == 0: unpacked += ch tree_dict[len(tree_dict) + 1] = ch else: term = tree_dict.get(index) + ch unpacked += term tree_dict[len(tree_dict) + 1] = term return unpacked if __name__ == '__main__': messages = ['ABBCBCABABCAABCAAB', 'BABAABRRRA', 'AAAAAAAAA'] for m in messages: pack = compress(m) unpack = uncompress(pack) print(unpack == m)
加载全部内容