DatabaseUuidEfficiency: test.py

File test.py, 3.1 KB (added by flip, 9 years ago)
Line 
1from __future__ import division
2import sqlite3 as sqlite
3import uuid as uuid_module
4import os
5import random
6import time
7import cProfile as profile
8
9# number of UUIDs to generate
10UUID_COUNT = 1000000
11
12# number of random UUIDs to select
13SELECT_COUNT = 100000
14
15# filename for the test data
16INPUT_FILENAME = "test_data.txt"
17
18# USE_CHAR toggles whether or not to use the text or BLOB representation
19USE_CHAR = True
20DATABASE_FILENAME = "char.sqlite" if USE_CHAR else "blob.sqlite"
21
22
23   
24def generate_uuids_text():
25    """Generates the test data. You only need to run it once."""
26    print "Generating %d UUIDs..." % UUID_COUNT
27    uuids = [str(uuid_module.uuid4()) for i in range(UUID_COUNT)]
28   
29    open(INPUT_FILENAME, "w").write("\n".join(uuids))
30   
31   
32def write_uuids():
33    """Writes the test data to the database. You only need to run it once
34    for each database type (char & BLOB)."""
35    if os.path.exists(DATABASE_FILENAME):
36        os.remove(DATABASE_FILENAME)
37   
38    conn = sqlite.Connection(DATABASE_FILENAME)
39   
40    cursor = conn.cursor()
41
42    type_ = "CHAR" if USE_CHAR else "BLOB"
43    sql = "create table uuids (id %s PRIMARY KEY)" % type_
44
45    cursor.execute(sql)
46   
47    f = open(INPUT_FILENAME)
48   
49    for line in f.readlines():
50        line = line.strip()
51        uuid = uuid_module.UUID(line)
52        uuid = uuid.hex if USE_CHAR else sqlite.Binary(uuid.bytes)
53        #print line
54        sql = "insert into uuids (id) values (?)"
55        cursor.execute(sql, [uuid])
56       
57    conn.commit()
58
59
60def test_select_time():
61    """This is the interesting function that you'll want to run repeatedly."""
62    conn = sqlite.Connection(DATABASE_FILENAME)
63   
64    cursor = conn.cursor()
65   
66    uuids = open(INPUT_FILENAME).read().split("\n")
67   
68    # I convert the UUID strings to UUID objects. When USE_CHAR == True,
69    # this isn't necessary. In fact it adds an unfair penalty in that case
70    # because the UUID objects have to be converted back to strings before
71    # being passed to SQLite in a query.
72    uuids = [uuid_module.UUID(uuid) for uuid in uuids]
73
74    sql = "select * from uuids where id = ?"
75    start = time.time()
76    for i in range(SELECT_COUNT):
77        uuid = random.choice(uuids)
78        #param = uuid if USE_CHAR else sqlite.Binary(uuid_module.UUID(uuid).bytes)
79        param = str(uuid) if USE_CHAR else sqlite.Binary(uuid.bytes)
80        cursor.execute(sql, [param])
81        # Fetch & discard result.
82        #assert(cursor.fetchone()[0] == uuid)
83        result = cursor.fetchone()[0]
84       
85        # The two lines below make a big difference in timing!
86        # if not USE_CHAR:
87        #     result = str(uuid_module.UUID(bytes=result))
88       
89        # try:
90        #     assert(result == str(uuid))
91        # except:
92        #     import pdb
93        #     pdb.set_trace()
94   
95    elapsed = time.time() - start
96   
97    print "Completed selecting %d UUIDs in %f seconds (%.5f per 1000)" % \
98                    (SELECT_COUNT, elapsed, elapsed * 1000 / SELECT_COUNT)
99   
100       
101   
102   
103#generate_uuids_text()
104
105#write_uuids()
106#test_select_time()
107
108profile.run("test_select_time()")