使用bitset实现毫秒级查询(实例讲解)

网友投稿 276 2023-03-23


使用bitset实现毫秒级查询(实例讲解)

前言

因为业务要求api的一次请求响应时间在10ms以内,所以传统的数据库查询操作直接被排除(网络io和磁盘io)。通过调研,最终使用了bieset,目前已经正常运行了很久

bitset介绍

看JDK中的解释简直一头雾水,用我自己的理解概括一下

1.bitset的内部实现是long数组

2.set中每一个位的默认值为false(0)

3.bitset长度按需增长

4.bitset非线程安全

bitset关键方法分析

/**

* Sets the bit at the specified index to {@code true}.

*

* @param bitIndex a bit index

* @throws IndexOutOfBoundsException if the specified index is negative

* @since JDK1.0

*/

public void set(int bitIndex) {

if (bitIndex < 0)

throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

int wordIndex = wordIndex(bitIndex);

expandTo(wordIndex);

words[wordIndex] |= (1L << bitIndex); // Restores invariants

checkInvariants();

}

设置指定“位”为true,bitIndex参数为非负整数。假设我们执行以下代码,观察上面代码中worIndex,words[wordIndex]值的变化

BitSet bs = new BitSet()

bs.set(0);

bs.set(1);

bs.set(2);

bs.set(3);

bs.set(4);

bitIndex

wordIndex

words[wordIndex]

words[wordIndex]二进制表示

0

0

1

0001

1

0

3

0011

2

0

7

0111

3

0

15

1111

4

0

31

0001 1111

通过上表,我们可以很清晰的根据bitIndex和words[wordIndex]二进制值的对应关系,得到一个结论,即:bitset中每一个long可以表示64个非负整数在bitSet中存在与否。例如:0001可以表示整数0在bitset中存在,1111可以表示整数3,2,1,0在bitset中存在。

进入正题,实现bitset毫秒级查询

想象一个场景,我们有一张user表

CREATE TABLE `user` (

`id` int(11) NOT NULL AUTO_INCREMENT,

`name` varchar(255) NOT NULL,

`address` varchar(255) NOT NULL comment '地址',

`gender` varchar(10) NOT NULL comment '性别',

`age` varchar(10) http://NOT NULL,

PRIMARY KEY (`uid`)

) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8;

假设我们要查询“北京市18岁的女生”,那么对应的sql如下:

select * from `user` where address='北京' and age='18' and gender='girl';

如何使用bitset实现同样的查询呢?

1.将user表数据加载进内存中

2.为user表建立address,age,gender维度的bitset索引

3.根据索引查询数据

1.将user表数据加载进内存中

将user表从数据库读取出来存入List

User实体类:

public class User implements Cloneable {

private String name;

private String address;

private String gender;

private String age;

@Override

public String toString() {

return "User [name=" + name + ", address=" + address + ", gender=" + gender + ", age=" + age + "]";

}

@Override

public User clone() {

User user = null;

try {

user = (User) super.clone();

} catch (CloneNotSupportedException e) {

e.printStackTrace();

}

return user;

}

//省略get set 方法。。。

2.建立索引

创建bitset索引模型类

public class BitSetIndexModel {

private String type;//索引类型:address,age,gender

private ConcurrentHashMap map;//索引类型和bitSet在bsList中下标的映射关系

private List list;//索引类型的值集合,例如gender:girl,boy

private List bsList;//bitset集合

public BitSetIndex() {

}

public BitSetIndexModel(String type) {

this.type = type;

map = new ConcurrentHashMap();

list = new ArrayList();

bsList = new ArrayList();

}

public String getType() {

return type;

}

public void setType(String type) {

this.type = type;

}

public Map getMap() {

return map;

}

public void setMap(ConcurrentHashMap map) {

this.map = map;

}

public List getList() {

return list;

}

public void setList(List list) {

this.list = list;

}

public List getBsList() {

return bsList;

}

public void setBsList(List bsList) {

this.bsList = bsList;

}

/**

*

* @param str

* @param i

*/

public void createIndex(String str, int i) {

BitSet bitset = null;

//获取‘str'对应的bitset在bsList中的下标

Integer index = this.getMap().get(str);

if (index != null) {

bitset = this.getBsList().get(index);

if (bitset == null) {

bitset = new BitSet();

this.getBsList().add(index, bitset);

}

bitset.set(i, true);// 将str对应的位置为true,true可省略

} else {

bitset = new BitSet();

List list = this.getList();

list.add(str);

index = list.size() - 1;

bitset.set(i, true);

this.getBsList().add(bitset);

this.getMap().put(str, index);

}

}

/**

* 从entity里拿出符合条件的bitset

*

* @param str

* @return

*/

public BitSet get(String str) {

BitSet bitset = null;

str = str.toLowerCase();

Integer index = this.getMap().get(str);

if (index != null) {

bitset = this.getBsList().get(index);

} else {

bitset = new BitSet();

}

return bitset;

}

/**

* bitset的与运算

*

* @param str

* @param bitset

* @return

*/

public BitSet and(String str, BitSet bitset) {

if (str != null) {

str = str.toLowerCase();

if (bitset != null) {

bitset.and(get(str));

} else {

bitset = new BitSet();

bitset.or(get(str));

}

}

return bitset;

}

/**

* bitset的或运算

*

* @param str

* @param bitset

* @return

*/

public BitSet or(String str, BitSet bitset) {

if (str != null) {

str = str.toLowerCase();

if (bitset != null) {

bitset.or(get(str));

} else {

bitset = new BitSet();

bitset.or(get(str));

}

}

return bitset;

}

/**

* 获取bitset值为true的 即 把 bitset翻译为list的索引

*

* @param bitset

* @return

*/

public static List getRealIndexs(BitSet bitset) {

List indexs = new ArrayList();

if (bitset != null) {

int i = bitset.nextSetBit(0);

if (i != -1) {

indexs.add(i);

for (i = bitset.nextSetBit(i + 1); i >= 0; i = bitset.nextSetBit(i + 1)) {

int endOfRun = bitset.nextClearBit(i);

do {

indexs.add(i);

} while (++i < endOfRun);

}

}

}

return indexs;

}

}

为每一个user对象创建address,gender,age维度索引

public class UserIndexStore {

private static final String ADDRESS = "address";

private static final String GENDER = "gender";

private static final String AGE = "age";

private BitSetIndexModel address;

private BitSetIndexModel gender;

private BitSetIndexModel age;

private ConcurrentHashMap userMap;//存储所有的user数据

public static final UserIndexStore INSTANCE = getInstance();

private UserIndexStore() {

init();

}

public static UserIndexStore getInstance() {

return UserIndexStoreHolder.instance;

}

private static class UserIndexStoreHolder {

private static UserIndexStore instance = new UserIndexStore();

}

private void init() {

this.address = new BitSetIndexModel(ADDRESS);

this.gender = new BitSetIndexModel(GENDER);

this.age = new BitSetIndexModel(AGE);

userMap = new ConcurrentHashMap();

}

/**

* 构建索引

* @param users

*/

public void createIndex(List users) {

if (users != null && users.size() > 0) {

for (int index = 0; index < users.size(); index++) {

User user = users.get(index);

getAddress().update(user.getAddress(), index);

getGender().update(user.getGender(), index);

getAge().update(user.getAge(), index);

this.userMap.put(index, user);

}

}

}

public BitSet query(String address, String gender, String age) {

BitSet bitset = null;

bitset = getAddress().and(address, bitset);

bitset = getGender().and(gender, bitset);

bitset = getAge().and(age, bitset);

return bitset;

}

public User findUser(Integer index) {

User user = this.userMap.get(index);

if (user != null) {

return user.clone();//可能会对user做修改操作,要保证内存原始数据不变

}

return null;

}

public BitSetIndexModel getAddress() {

return address;

}

public void setAddress(BitSetIndexModel address) {

this.address = address;

}

public BitSetIndexModel getGender() {

return gender;

}

public void setGender(BitSetIndexModel gender) {

this.gender = gender;

}

public BitSetIndexModel getAge() {

return age;

}

public void setAge(BitSetIndexModel age) {

this.age = age;

}

}

3.测试bitset

public class BitSetTest {

public static void main(String[] args) {

List users = buildData();

UserIndexStore.getInstance().createIndex(users);

ExecutorService executorService = Executors.newFixedThreadPool(20);

int num = 2000;

long begin1 = System.currentTimeMillis();

for (int i = 0; i < num; i++) {

Runnable syncRunnable = new Runnable() {

@Override

public void run() {

List indexs = BitSetIndexModel.getRealIndexs(UserIndexStore.getInstance().query("北京", "girl", "18"));

for (Integer index : indexs) {

UserIndexStore.getInstance().findUser(index);

}

}

};

executorService.execute(syncRunnable);

}

executorService.shutdown();

while (true) {

try {

if (executorService.awaitTermination(1, TimeUnit.SECONDS)) {

System.out.println("单次查询时间为:" + (System.currentTimeMillis() - begin1) / num + "ms");

break;

}

} catch (InterruptedException e) {

e.printStackTrace();

}

}

}

private static List buildData() {

String[] addresss = { "北京", "上海", "深圳" };

String[] ages = { "16", "17", "18" };

List users = new ArrayList<>();

for (int i = 0; i < 200000; i++) {

User user = new User();

user.setName("name" + i);

int rand = ThreadLocalRandom.current().nextInt(3);

user.setAddress(addresss[ThreadLocalRandom.current().nextInt(3)]);

user.setGender((rand & 1) == 0 ? "girl" : "boy");

user.setAge(ages[ThreadLocalRandom.current().nextInt(3)]);

users.add(user);

}

return users;

}

}

测试结果(查询2w次):

数据量(users.size()) | 并发数 | 平均查询时间

---|---|---

20w | 10 | 1ms

50w | 20 | 3ms

100w| 50 | 9ms

测试机为thinkpad x240 i5 8g内存

4.总结

==优点==:

通过测试发现随着数据量的增大和并发数的提高,平均耗时并没有明显升高,并且响应时间都在10ms以内

==缺点==:

1.不适合数据量太大的情况,因为需要把数据全部加载进内存

2.不适合复杂查询

3.不适合对name,id等唯一值做查询

后记

因为我们的查询业务比较简单,唯一的要求是速度,并且数据量也不大,每张表的数据量都不超过100w,所以使用这种方式比较合适。

在本篇文章中只谈到了如何创建索引,以及最基本的查询,在下一篇中我会继续说明如何更新索引,以及一些复杂查询,比如<,>,between and。


版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:基于mybatis高级映射多对多查询的实现
下一篇:spring中的FactoryBean代码示例
相关文章

 发表评论

暂时没有评论,来抢沙发吧~