Java 实现LZ78压缩算法的示例代码

网友投稿 325 2022-10-25


Java 实现LZ78压缩算法的示例代码

LZ78 压缩算法的 java 实现

1、压缩算法的实现

通过多路搜索树提高检索速度

package com.wretchant.lz78;

import java.util.*;

/**

多路英文单词查找树

*/

class Trie {

private TrieNode root;

public Trie() {

root = new TrieNode();

root.wordEnd = false;

}

public void insert(String word) {

TrieNode node = root;

for (int i = 0; i < word.length(); i++) {

Character c = word.charAt(i);

if (!node.childdren.containsKey(c)) {

node.childdren.put(c, new TrieNode());

}

node = node.childdren.get(c);

}

node.wordEnd = true;

}

public boolean search(String word) {

TrieNode node = root;

for (int i = 0; i < word.length(); i++) {

Character c = word.charAt(i);

if (!node.childdren.containsKey(c)) {

return false;

}

node = node.childdren.get(c);

}

return node.wordEnd;

}

}

class TrieNode {

Map childdren;

boolean wordEnd;

public TrieNode() {

childdren = new HashMap();

wordEnd = false;

http:// }

}

/**

编码表

*/

class Output {

private Integer index;

private Character character;

Output(Integer index, Character character) {

this.index = index;

this.character = character;

}

public Integer getIndex() {

return index;

}

public Character getCharacter() {

return character;

}

}

class LZencode {

@FunctionalInterface

interface Encode {

List encode(String message);

}

/**

构建多路搜索树

*/

static Trie buildTree(Set keys) {

Trie trie = new Trie();

keys.forEach(trie::insert);

return trie;

}

public static final Encode ENCODE = message -> {

// 构建压缩后的编码表

List outputs = new ArrayList<>();

Map treeDict = new HashMap<>();

int mLen = message.length();

int i = 0;

while (i < mLen) {

Set keySet = treeDict.keySet();

// 生成多路搜索树

Trie trie = buildTree(keySet);

char messageI = message.charAt(i);

String messageIStr = String.valueOf(messageI);

// 使用多路树进行搜索

if (!trie.search(messageIStr)) {

outputs.add(new Output(0, messageI));

treeDict.put(messageIStr, treeDict.size() + 1);

i++;

} else if (i == mLen - 1) {

outputs.add(new Output(treeDict.get(messageIStr), ' '));

i++;

} else {

for (int j = i + 1; j < mLen; j++) {

String substring = message.substring(i, j + 1);

String str = message.substring(i, j);

// 使用多路树进行搜索

if (!trie.search(substring)) {

outputs.add(new Output(treeDict.get(str), message.charAt(j)));

treeDict.put(substring, treeDict.sizgvdCPzRie() + 1);

i = j + 1;

break;

}

if (j == mLen - 1) {

outputs.add(new Output(treeDict.get(substring), ' '));

i = j + 1;

}

}

}

}

return outputs;

};

}

2、解压缩算法的实现

package com.wretchant.lz78;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

public class LZdecode {

@FunctionalInterface

interface Decode {

/**

@param outputs 编码表

@return 解码后的字符串

*/

String decode(List outputs);

}

/**

根据编码表进行解码

*/

public static final Decode DECODE = (List outputs) -> {

StringBuilder unpacked = new StringBuilder();

Map treeDict = new HashMap<>();

for (Output output : outputs) {

Integer index = output.getIndex();

Character character = output.getCharacter();

if (index == 0) {

unpacked.append(character);

treeDict.put(treeDict.size() + 1, character.toString());

continue;

}

String term = "" + treeDict.get(index) + character;

unpacked.append(term);

treeDict.put(treeDict.size() + 1, term);

}

return unpacked.toString();

};

}

3、测试和使用

package com.wretchant.lz78;

import java.io.InputStream;

import java.util.List;

import java.util.Scanner;

import java.util.function.ToIntFunction;

public class LZpack {

public static final ToIntFunction> DICT_PRINT = outputs -> {

outputs.forEach(output http://-> {

System.out.println("index :" + output.getIndex() + " char :" + output.getCharacter());

});

return 1;

};

public static void main(String[] args) {

Scanner scanner = new Scanner(System.in);

System.out.println("Please input text ");

String input = scanner.nextLine();

LZencode.Encode encode = LZencode.ENCODE;

List outputs = encode.encode(input);

DICT_PRINT.applyAsInt(outputs);

}

}

测试结果如下

4、python 版本的实现代码

def compress(message):

tree_dict, m_len, i = {}, len(message), 0

while i < m_len:

# case I

if message[i] not in tree_dict.keys():

yield (0, message[i])

tree_dict[message[i]] = len(tree_dict) + 1

i += 1

# case III

elif i == m_len - 1:

yield (tree_dict.get(message[i]), '')

i += 1

else:

for j in range(i + 1, m_len):

# case II

if message[i:j + 1] not in tree_dict.keys():

yield (tree_dict.get(message[i:j]), message[j])

tree_dict[message[i:j + 1]] = len(tree_dict) + 1

i = j + 1

break

# case III

elif j == m_len - 1:

yield (tree_dict.get(message[i:j + 1]), '')

i = j + 1

def uncompress(packed):

unpacked, tree_dict = '', {}

for index, ch in packed:

if index == 0:

unpacked += ch

tree_dict[len(tree_dict) + 1] = ch

else:

term = tree_dict.get(index) + ch

unpacked += term

tree_dict[len(tree_dict) + 1] = term

return unpacked

if __name__ == '__main__':

messages = ['ABBCBCABABCAABCAAB', 'BABAABRRRA', 'AAAAAAAAA']

for m in messages:

pack = compress(m)

unpack = uncompress(pack)

print(unpack == m)


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:dns补
下一篇:squid
相关文章

 发表评论

评论列表