提升SQLITE插入、查询效率的方法

  • A+
所属分类:C#

提升SQLITE插入、查询效率的方法 - 小麒麟的成长之路 - CSDN博客

提升SQLITE插入、查询效率的方法

rc = sqlite3_exec(db, "create table if not exists testinfo (id integer primary key, age integer, height text, weight text)", NULL, NULL, &zErr);

插入篇:

1、sqlite3_exec()
通常,我们使用sqlite3_exec()函数来处理数据的插入操作,该函数直接调用sql语句对数据进行插入,所以使用起来很方便,插入100w条数据

1.
for( i = 0; i < 1000000; i++)
2.
{
3.

snprintf(sqlcmd, sizeof(sqlcmd), "insert into testinfo values(%d, %d, '%d', '%s')", i, i2, i10, buf);

4.
sqlite3_exec(db, sqlcmd, NULL, NULL, &zErr);
5.
}

该函数每调用一次,都会隐式地开启一次事务,对于大批量的操作,如果不加修饰地直接多次调用该函数,会导致插入效率极低
执行5次平均耗时:1721.272秒,极其慢

2、显式调用事务
利用事务的互斥性,如果在批量的插入操作前显式地开启一次事务,在插入操作结束后,提交事务,那么所有的操作将只执行一次事务,大大地提高IO效率

1.
sqlite3_exec(db, "BEGIN;", 0, 0, NULL);
2.
for( i = 0; i < 1000000; i++)
3.
{
4.

snprintf(sqlcmd, sizeof(sqlcmd), "insert into testinfo values(%d, %d, '%d', '%s')", i, i2, i10, buf);

5.
sqlite3_exec(db, sqlcmd, NULL, NULL, &zErr);
6.
}
7.
sqlite3_exec(db, "COMMIT;", 0, 0, NULL);
执行5次平均耗时:15.559秒 有了很大改善

3、执行准备
sqlite3_exec()函数直接调用sql语句字符串,每执行一次该函数,都要进行一此“词法分析”和“语法分析”
为此sqlite引入了“执行准备”这一功能,即事先把sql语句编译成系统能够理解的语言,然后一步一步执行,这样大大地提高了效率,同样是插入100w条数据:

1.
sqlite3_exec(db, "BEGIN;", 0, 0, 0);
2.
const char sql = "insert into testinfo values(?,?,?,?)";
3.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
4.
for(i = 0; i < 1000000; i++)
5.
{
6.
sprintf(tmpstr, "%d", i
10);
7.
sqlite3_reset(stmt);
8.
sqlite3_bind_int(stmt, 1, i);
9.
sqlite3_bind_int(stmt, 2, i*2);
10.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
11.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
12.
sqlite3_step(stmt);
13.
}
14.
sqlite3_finalize(stmt);
15.
sqlite3_exec(db, "COMMIT;", 0, 0, 0);
执行5次平均耗时5.298秒

4、关闭写同步
如果有定期备份的机制,而且少量数据丢失可接受,可将同步方式设置为OFF,默认为FULL。
sqlite3_exec(db, "PRAGMA synchronous = OFF; ", 0,0,0);

full写入速度最慢,但保证数据是安全的,不受断电、系统崩溃等影响,而off可以加速数据库的一些操作,但如果系统崩溃或断电,则数据库可能会损毁,所以不是很推荐这种做法

执行5次平均耗时5.468秒(不是很理想)

5、使用WAL模式

WAL:Write Ahead Logging,他是数据库中用于实现原子事务的一种机制,从3.7.0版本后引入
WAL模式主要有两个优点:

1、读写可以完全并发进行,不会互相阻塞(但是写之间仍然不能并发)
2、WAL在大多情况下,拥有更好的性能(因为无需每次写入时都要写两个文件)

Rollback journal机制原理:在修改数据库文件中的数据前,先将修改所在分页中的数据备份在另一个地方,然后再将修改写入到数据中;如果事务失败,则将备份数据拷贝回来,撤销修改;如果事务成功,则删除备份,提交修改。

WAL机制原理:修改并不直接写入到数据库文件中,而是写入到另外一个称为WAL的文件中,如果事务失败,wal中的文件会被忽略,撤销修改;如果事务成功,它将在随后的某个时间被写回到数据库文件中,提交修改。

性能差异主要源于每次事务提交,wal只需要将更新的日志写入磁盘,而delete模式首先要将原始数据拷贝到日志文件中,并进行fsync,然后将修改页写入磁盘,同时也需要fsync,确保数据落盘,并且还要清除日志文件。因此写事务在WAL模式下,只需要一次fsync,并且是顺序写,而在delete模式下需要至少两次fsync(日志,数据),并且更新的数据离散分布在多个page中,因此可能需要多个fsync。

WAL使用共享内存技术,因此所有读写进程必须在同一个机器上
开启WAL模式的方法:

sqlite3_exec(db, "PRAGMA journal_mode=WAL; ", 0,0,0);
在前面的基础上,使用WAL模式后执行5次操作平均耗时4.324秒

6、内存数据库
另外,如果数据无需长时间保存,可以使用sqlite的内存数据库替代文件数据库
开启sqlite内存数据库的方式:

1.
sqlite3* db = NULL;
2.
rc = sqlite3_open(":memory", &db);
执行5次平均耗时:4.052秒
但是内存数据库存在如下缺点:
1.断电或者程序崩溃后数据库就会消失
2.在内存中的数据库不能被别的进程访问
3.不支持像在硬盘上的读写互斥处理,需要自己加锁

查询篇:

1、sqlite3_get_table()
通常使用sqlite3_get_table()函数来执行查询
1.
for( i = 0; i < 300000; i++)
2.
{
3.
snprintf(sqlcmd, sizeof(sqlcmd), "select * from testinfo where id = %d", i);
4.
sqlite3_get_table(db, sqlcmd, &pRecord, &rows, &cols, &zErr);
5.
}
执行5次平均耗时229.438秒

2、显式开启事务
和sqlite3_exec()一样,大批量的调用该函数会导致效率极其低下,所以还是使用事务的方式来提高效率
1.
sqlite3_exec(db, "BEGIN", 0, 0, NULL);
2.
for( i = 0; i < 300000; i++)
3.
{
4.
snprintf(sqlcmd, sizeof(sqlcmd), "select * from testinfo where id = %d", i);
5.
sqlite3_get_table(db, sqlcmd, &pRecord, &rows, &cols, &zErr);
6.
}
7.
sqlite3_exec(db, "COMMIT", 0, 0, NULL);
执行5次平均耗时23.177秒

3、使用执行准备:
1.
sqlite3_exec(db, "BEGIN", 0, 0, NULL);
2.
char *sql = "select * from testinfo where id = ?";
3.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
4.
for(i = 0; i < 1000000; i++)
5.
{
6.
sqlite3_reset(stmt);
7.
sqlite3_bind_int(stmt, 1, i);
8.
rc = sqlite3_step(stmt);
9.
while(rc == SQLITE_ROW)
10.
{
11.
n1 = sqlite3_column_int(stmt, 0);
12.
n2 = sqlite3_column_int(stmt, 1);
13.
ch1 = sqlite3_column_text(stmt, 2);
14.
ch2 = sqlite3_column_text(stmt, 3);
15.
rc = rc = sqlite3_step(stmt);
16.
}
17.
}
18.
sqlite3_finalize(stmt);
19.
sqlite3_exec(db, "COMMIT", 0, 0, NULL);

执行5次平均耗时3.544秒

4、查询内存数据库
执行5次平均耗时3.235秒

附上完整代码:
1.

include

2.

include

3.

include

4.

include

5.

include"sqlite3.h"

6.

7.
int main(int argc, char argv[])
8.
{
9.
int rc = 0;
10.
int i = 0;
11.
int j = 0;
12.
int rows, cols;
13.
int n1, n2;
14.
sqlite3
db = NULL;
15.
sqlite3 dbMem = NULL;
16.
char
zErr = NULL;
17.
char pRecord = NULL;
18.
sqlite3_stmt
stmt = NULL;
19.
char
buf = "CJcEEAAYASCgExEFAaATEqATEy";
20.
struct timeval tmv1;
21.
struct timeval tmv2;
22.
float tmcost;
23.

24.
char tmpstr[32] = {0};
25.
char sqlcmd[2048] = {0};
26.
const char ch1;
27.
const char
ch2;
28.

29.
rc = sqlite3_open("kaf.db", &db);
30.
if (rc)
31.
{
32.
fprintf(stderr, "Can't open database:%s\n", sqlite3_errmsg(db));
33.
sqlite3_close(db);
34.
exit(1);
35.
}
36.

37.

rc = sqlite3_exec(db, "create table if not exists testinfo (id integer primary key, age integer, height text, weight text)", NULL, NULL, &zErr);

38.
if (SQLITE_OK != rc) {
39.
fprintf(stderr, "create sql failed:%s\n", zErr);
40.
sqlite3_close(db);
41.
exit(1);
42.
}
43.

44.
//sqlite3_exec()逐条插入
45.
if (atoi(argv[1]) == 1)
46.
{
47.
gettimeofday(&tmv1, NULL);
48.
for( i = 0; i < 1000000; i++)
49.
{
50.

snprintf(sqlcmd, sizeof(sqlcmd), "insert into testinfo values(%d, %d, '%d', '%s')", i, i2, i10, buf);

51.
sqlite3_exec(db, sqlcmd, NULL, NULL, &zErr);
52.
}
53.
gettimeofday(&tmv2, NULL);
54.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

55.
printf("the 1 operation costs %f\n", tmcost);
56.
}
57.
//开启事务
58.
else if(atoi(argv[1]) == 2)
59.
{
60.
gettimeofday(&tmv1, NULL);
61.
sqlite3_exec(db, "BEGIN;", 0, 0, NULL);
62.
for( i = 0; i < 1000000; i++)
63.
{
64.

snprintf(sqlcmd, sizeof(sqlcmd), "insert into testinfo values(%d, %d, '%d', '%s')", i, i2, i10, buf);

65.
sqlite3_exec(db, sqlcmd, NULL, NULL, &zErr);
66.
}
67.
sqlite3_exec(db, "COMMIT;", 0, 0, NULL);
68.
gettimeofday(&tmv2, NULL);
69.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

70.
printf("the 2 operation costs %f\n", tmcost);
71.
}
72.
//执行准备
73.
else if(atoi(argv[1]) == 3)
74.
{
75.
gettimeofday(&tmv1, NULL);
76.
sqlite3_exec(db, "BEGIN;", 0, 0, 0);
77.
const char sql = "insert into testinfo values(?,?,?,?)";
78.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
79.
for(i = 0; i < 1000000; i++)
80.
{
81.
sprintf(tmpstr, "%d", i
10);
82.
sqlite3_reset(stmt);
83.
sqlite3_bind_int(stmt, 1, i);
84.
sqlite3_bind_int(stmt, 2, i*2);
85.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
86.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
87.
sqlite3_step(stmt);
88.
}
89.
sqlite3_finalize(stmt);
90.
sqlite3_exec(db, "COMMIT;", 0, 0, 0);
91.
gettimeofday(&tmv2, NULL);
92.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

93.
printf("the 4 operation costs %f\n", tmcost);
94.
}
95.
//关闭写同步
96.
else if(atoi(argv[1]) == 4)
97.
{
98.
sqlite3_exec(db, "PRAGMA synchronous = OFF; ", 0,0,0);
99.
gettimeofday(&tmv1, NULL);
100.
sqlite3_exec(db, "BEGIN;", 0, 0, 0);
101.
const char sql = "insert into testinfo values(?,?,?,?)";
102.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
103.
for(i = 0; i < 1000000; i++)
104.
{
105.
sprintf(tmpstr, "%d", i
10);
106.
sqlite3_reset(stmt);
107.
sqlite3_bind_int(stmt, 1, i);
108.
sqlite3_bind_int(stmt, 2, i*2);
109.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
110.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
111.
sqlite3_step(stmt);
112.
}
113.
sqlite3_finalize(stmt);
114.
sqlite3_exec(db, "COMMIT;", 0, 0, 0);
115.
gettimeofday(&tmv2, NULL);
116.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

117.
printf("the 5 operation costs %f\n", tmcost);
118.
}
119.
//使用WAL模式
120.
else if(atoi(argv[1]) == 5)
121.
{
122.
sqlite3_exec(db, "PRAGMA journal_mode=WAL; ", 0,0,0);
123.
gettimeofday(&tmv1, NULL);
124.
sqlite3_exec(db, "BEGIN;", 0, 0, 0);
125.
const char sql = "insert into testinfo values(?,?,?,?)";
126.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
127.
for(i = 0; i < 1000000; i++)
128.
{
129.
sprintf(tmpstr, "%d", i
10);
130.
sqlite3_reset(stmt);
131.
sqlite3_bind_int(stmt, 1, i);
132.
sqlite3_bind_int(stmt, 2, i*2);
133.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
134.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
135.
sqlite3_step(stmt);
136.
}
137.
sqlite3_finalize(stmt);
138.
sqlite3_exec(db, "COMMIT;", 0, 0, 0);
139.
gettimeofday(&tmv2, NULL);
140.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

141.
printf("the 5 operation costs %f\n", tmcost);
142.
}
143.
//内存数据库
144.
else if(atoi(argv[1]) == 6)
145.
{
146.
rc = sqlite3_open(":memory:", &dbMem);
147.

rc = sqlite3_exec(dbMem, "create table if not exists testinfo (id integer primary key, age integer, height text, weight text)", NULL, NULL, &zErr);

148.
sqlite3_exec(dbMem, "PRAGMA synchronous = OFF; ", 0,0,0);
149.
gettimeofday(&tmv1, NULL);
150.
sqlite3_exec(dbMem, "BEGIN;", 0, 0, 0);
151.
const char sql = "insert into testinfo values(?,?,?,?)";
152.
sqlite3_prepare_v2(dbMem, sql, strlen(sql), &stmt, 0);
153.
for(i = 0; i < 1000000; i++)
154.
{
155.
sprintf(tmpstr, "%d", i
10);
156.
sqlite3_reset(stmt);
157.
sqlite3_bind_int(stmt, 1, i);
158.
sqlite3_bind_int(stmt, 2, i*2);
159.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
160.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
161.
sqlite3_step(stmt);
162.
}
163.
sqlite3_finalize(stmt);
164.
sqlite3_exec(dbMem, "COMMIT;", 0, 0, 0);
165.
gettimeofday(&tmv2, NULL);
166.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

167.
printf("the 6 operation costs %f\n", tmcost);
168.
}
169.
//sqlite3_get_table()逐条查询
170.
else if(atoi(argv[1]) == 7)
171.
{
172.
gettimeofday(&tmv1, NULL);
173.
for( i = 0; i < 1000000; i++)
174.
{
175.
snprintf(sqlcmd, sizeof(sqlcmd), "select * from testinfo where id = %d", i);
176.
sqlite3_get_table(db, sqlcmd, &pRecord, &rows, &cols, &zErr);
177.
}
178.
gettimeofday(&tmv2, NULL);
179.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

180.
printf("the 7 operation costs %f\n", tmcost);
181.
}
182.
//开启事务
183.
else if(atoi(argv[1]) == 8)
184.
{
185.
gettimeofday(&tmv1, NULL);
186.
sqlite3_exec(db, "BEGIN", 0, 0, NULL);
187.
for( i = 0; i < 1000000; i++)
188.
{
189.
snprintf(sqlcmd, sizeof(sqlcmd), "select * from testinfo where id = %d", i);
190.
sqlite3_get_table(db, sqlcmd, &pRecord, &rows, &cols, &zErr);
191.
}
192.
sqlite3_exec(db, "COMMIT", 0, 0, NULL);
193.
gettimeofday(&tmv2, NULL);
194.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

195.
printf("the 8 operation costs %f\n", tmcost);
196.
}
197.
//执行准备
198.
else if(atoi(argv[1]) == 9)
199.
{
200.
char *sql = "select * from testinfo where id = ?";
201.
gettimeofday(&tmv1, NULL);
202.
sqlite3_exec(db, "BEGIN", 0, 0, NULL);
203.
sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, 0);
204.
for(i = 0; i < 1000000; i++)
205.
{
206.
sqlite3_reset(stmt);
207.
sqlite3_bind_int(stmt, 1, i);
208.
rc = sqlite3_step(stmt);
209.
while(rc == SQLITE_ROW)
210.
{
211.
n1 = sqlite3_column_int(stmt, 0);
212.
n2 = sqlite3_column_int(stmt, 1);
213.
ch1 = sqlite3_column_text(stmt, 2);
214.
ch2 = sqlite3_column_text(stmt, 3);
215.
rc = rc = sqlite3_step(stmt);
216.
}
217.
}
218.
sqlite3_finalize(stmt);
219.
sqlite3_exec(db, "COMMIT", 0, 0, NULL);
220.
gettimeofday(&tmv2, NULL);
221.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

222.
printf("the 9 operation costs %f\n", tmcost);
223.
}
224.
//内存数据库
225.
else if(atoi(argv[1]) == 10)
226.
{
227.
rc = sqlite3_open(":memory:", &dbMem);
228.

rc = sqlite3_exec(dbMem, "create table if not exists testinfo (id integer primary key, age integer, height text, weight text)", NULL, NULL, &zErr);

229.
sqlite3_exec(dbMem, "PRAGMA synchronous = OFF; ", 0,0,0);
230.
gettimeofday(&tmv1, NULL);
231.
sqlite3_exec(dbMem, "BEGIN;", 0, 0, 0);
232.
const char sql = "insert into testinfo values(?,?,?,?)";
233.
sqlite3_prepare_v2(dbMem, sql, strlen(sql), &stmt, 0);
234.
for(i = 0; i < 1000000; i++)
235.
{
236.
sprintf(tmpstr, "%d", i
10);
237.
sqlite3_reset(stmt);
238.
sqlite3_bind_int(stmt, 1, i);
239.
sqlite3_bind_int(stmt, 2, i*2);
240.
sqlite3_bind_text(stmt, 3, tmpstr, -1, SQLITE_STATIC);
241.
sqlite3_bind_text(stmt, 4, buf, -1, SQLITE_STATIC);
242.
sqlite3_step(stmt);
243.
}
244.
sqlite3_finalize(stmt);
245.
sqlite3_exec(dbMem, "COMMIT;", 0, 0, 0);
246.
gettimeofday(&tmv2, NULL);
247.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

248.
printf("the 6 operation costs %f\n", tmcost);
249.

250.

251.
char *sql2 = "select * from testinfo where id = ?";
252.
gettimeofday(&tmv1, NULL);
253.
sqlite3_exec(dbMem, "BEGIN", 0, 0, NULL);
254.
sqlite3_prepare_v2(dbMem, sql2, strlen(sql2), &stmt, 0);
255.
for(i = 0; i < 1000000; i++)
256.
{
257.
sqlite3_reset(stmt);
258.
sqlite3_bind_int(stmt, 1, i);
259.
rc = sqlite3_step(stmt);
260.
while(rc == SQLITE_ROW)
261.
{
262.
n1 = sqlite3_column_int(stmt, 0);
263.
n2 = sqlite3_column_int(stmt, 1);
264.
ch1 = sqlite3_column_text(stmt, 2);
265.
ch2 = sqlite3_column_text(stmt, 3);
266.
rc = rc = sqlite3_step(stmt);
267.
}
268.
}
269.
sqlite3_finalize(stmt);
270.
sqlite3_exec(dbMem, "COMMIT", 0, 0, NULL);
271.
gettimeofday(&tmv2, NULL);
272.

tmcost = (float)(tmv2.tv_sec10001000+tmv2.tv_usec - tmv1.tv_sec10001000+tmv1.tv_usec)/1000000;

273.
printf("the 9 operation costs %f\n", tmcost);
274.
}
275.

276.
sqlite3_close(db);
277.
return 1;
278.
}

编译方法:
将sqlite3.c  sqlite3.h放在同级目录
gcc test.c sqlite3.c -o test -lpthread -ldl


  • 我的微信
  • 这是我的微信扫一扫
  • weinxin
  • 我的微信公众号
  • 我的微信公众号扫一扫
  • weinxin