# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1466675167 -3600
# Node ID ea4d6142c6d9cd07b591f371f779285d3002a7c3
# Parent  f21605bcda2489483ffdbdb32cbcbdd83ca88391
maps: do not ask sqlite for row count

"SELECT COUNT(1) FROM x" is not O(1) for sqlite and can be slow on large
tables. This patch changes the count to be backed by a file instead.

The change exposes a risk that the number may become inaccurate, if
__setitem__ is called with a same key multiple times. But we don't do that
during pull, and only use __len__ to calculate how many revisions pulled,
or test if the map is empty. So it would be fine.

diff --git a/hgsubversion/maps.py b/hgsubversion/maps.py
--- a/hgsubversion/maps.py
+++ b/hgsubversion/maps.py
@@ -426,6 +426,7 @@ class RevMap(dict):
             revmap.exportrevmapv1(tmppath)
             os.rename(tmppath, self._filepath)
             hgutil.unlinkpath(revmap._dbpath)
+            hgutil.unlinkpath(revmap._rowcountpath, ignoremissing=True)
             return self._readmapfile()
         if ver != self.VERSION:
             raise hgutil.Abort('revmap too new -- please upgrade')
@@ -554,10 +555,13 @@ class SqliteRevMap(collections.MutableMa
 
     lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulledpath,
                                    default=0, deserializer=int)
+    rowcount = util.fileproperty('_rowcount', lambda x: x._rowcountpath,
+                                 default=0, deserializer=int)
 
     def __init__(self, revmap_path, lastpulled_path):
         self._filepath = revmap_path
         self._dbpath = revmap_path + '.db'
+        self._rowcountpath = self._dbpath + '.rowcount'
         self._lastpulledpath = lastpulled_path
 
         self._db = None
@@ -608,6 +612,7 @@ class SqliteRevMap(collections.MutableMa
     def clear(self):
         hgutil.unlinkpath(self._filepath, ignoremissing=True)
         hgutil.unlinkpath(self._dbpath, ignoremissing=True)
+        hgutil.unlinkpath(self._rowcountpath, ignoremissing=True)
         self._db = None
         self._hashes = None
         self._firstpull = None
@@ -635,10 +640,8 @@ class SqliteRevMap(collections.MutableMa
         return iter(rows)
 
     def __len__(self):
-        # 'WHERE rev >= 0' hints sqlite to use the rev index
-        with self._transaction() as db:
-            return db.execute('SELECT COUNT(1) FROM revmap ' +
-                              'WHERE rev >= 0').fetchone()[0]
+        # rowcount is faster than "SELECT COUNT(1)". the latter is not O(1)
+        return self.rowcount
 
     def __setitem__(self, key, binha):
         revnum, branch = key
@@ -653,6 +656,8 @@ class SqliteRevMap(collections.MutableMa
 
     def __delitem__(self, key):
         for row in self._querybykey('DELETE', key):
+            if self.rowcount > 0:
+                self.rowcount -= 1
             return
         # For performance reason, self._hashes is not updated
         raise KeyError(key)
@@ -687,6 +692,10 @@ class SqliteRevMap(collections.MutableMa
         self._db.executemany(
             'INSERT OR REPLACE INTO revmap (rev, branch, hash) ' +
             'VALUES (?, ?, ?)', rows)
+        # If REPLACE happens, rowcount can be wrong. But it is only used to
+        # calculate how many revisions pulled, and during pull we don't
+        # replace rows. So it is fine.
+        self.rowcount += len(rows)
 
     def _opendb(self):
         '''Open the database and make sure the table is created on demand.'''
@@ -723,7 +732,12 @@ class SqliteRevMap(collections.MutableMa
         with self._transaction('EXCLUSIVE'):
             map(self._db.execute, self.TABLESCHEMA)
             if version == RevMap.VERSION:
+                self.rowcount = 0
                 self._importrevmapv1()
+            elif not self.rowcount:
+                self.rowcount = self._db.execute(
+                    'SELECT COUNT(1) FROM revmap').fetchone()[0]
+
             # "bulk insert; then create index" is about 2.4x as fast as
             # "create index; then bulk insert" on a large repo
             map(self._db.execute, self.INDEXSCHEMA)