Modify ↓
#12895 closed enhancement (fixed)
Improve performance of sync command with many revisions
Reported by: | Ryan J Ollos | Owned by: | Jun Omae |
---|---|---|---|
Priority: | normal | Milestone: | 1.0.17 |
Component: | plugin/git | Version: | |
Severity: | normal | Keywords: | performance |
Cc: | Branch: | ||
Release Notes: |
Improve sync command when git repository has no changes. |
||
API Changes: | |||
Internal Changes: |
Description
The patch was proposed in gmessage:trac-dev:2ityvgVZxsQ/KEA6IwY2CAAJ.
-
tracopt/versioncontrol/git/PyGIT.py
diff --git a/tracopt/versioncontrol/git/PyGIT.py b/tracopt/versioncontrol/git/PyGIT.py index 47f397389..32aac9184 100644
a b class Storage(object): 596 596 key=lambda (name, rev, head): (not head, name)) 597 597 return [(name, rev) for name, rev, head in branches] 598 598 599 def get_refs(self): 600 for refname, rev in self.rev_cache.refs_dict.iteritems(): 601 if refname != 'HEAD': 602 yield refname, rev 603 599 604 def get_commits(self): 600 605 return self.rev_cache.rev_dict 601 606 -
tracopt/versioncontrol/git/git_fs.py
diff --git a/tracopt/versioncontrol/git/git_fs.py b/tracopt/versioncontrol/git/git_fs.py index 031f68c2b..c33eb7a5d 100644
a b class GitCachedRepository(CachedRepository): 100 100 return count > 0 101 101 return False 102 102 103 def needs_sync(): 104 max_holders = 999 105 revs = sorted(set(rev for refname, rev in repos.git.get_refs())) 106 for idx in xrange(0, len(revs), max_holders): 107 revs_ = revs[idx:idx + max_holders] 108 holders = ','.join(('%s',) * len(revs_)) 109 args = [self.id] 110 args.extend(revs_) 111 query = 'SELECT COUNT(*) FROM revision ' \ 112 'WHERE repos=%s AND rev IN (' + holders + ')' 113 for count, in self.env.db_query(query, args): 114 if count < len(revs_): 115 return True 116 return False 117 103 118 def traverse(rev, seen): 104 119 revs = [] 105 120 merge_revs = [] … … class GitCachedRepository(CachedRepository): 121 136 revs[idx:idx] = traverse(rev, seen) 122 137 return revs 123 138 124 while True: 125 repos.sync() 126 repos_youngest = repos.youngest_rev or '' 139 def sync_revs(): 127 140 updated = False 128 141 seen = set() 129 142 … … class GitCachedRepository(CachedRepository): 148 161 if feedback: 149 162 feedback(rev) 150 163 151 if updated: 152 continue # sync again 164 return updated 153 165 166 while True: 167 repos.sync() 168 repos_youngest = repos.youngest_rev or '' 169 if needs_sync() and sync_revs(): 170 continue # sync again 154 171 if meta_youngest != repos_youngest: 155 172 with self.env.db_transaction as db: 156 173 db("""
Attachments (0)
Change History (8)
comment:1 by , 7 years ago
Keywords: | performance added |
---|
comment:2 by , 7 years ago
comment:3 by , 7 years ago
Owner: | set to |
---|---|
Status: | new → assigned |
comment:4 by , 7 years ago
My proposed changes has off-by-one error. See [a9fd49b97/jomae.git] (jomae.git@t12895).
comment:5 by , 7 years ago
Owner: | changed from | to
---|
comment:6 by , 7 years ago
Max place-holders in sqlite3 is 999 on Windows, however it is 250000 on Ubuntu and Debian.
Python 2.7.10 (default, May 23 2015, 09:44:00) [MSC v.1500 64 bit (AMD64)] on win32 Type "help", "copyright", "credits" or "license" for more information. >>> from trac.test import EnvironmentStub >>> env = EnvironmentStub() >>> def q(n): ... query = 'SELECT * FROM system WHERE 0 IN ({0})'.format(','.join(('%s',) * n)) ... args = list(xrange(n)) ... return env.db_query(query, args) ... >>> q(999) [(u'database_version', u'29')] >>> q(1000) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "<stdin>", line 4, in q File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\api.py", line 124, in execute return db.execute(query, params) File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\util.py", line 128, in execute cursor.execute(query, params if params is not None else []) File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\util.py", line 61, in execute r = self.cursor.execute(sql_escape_percent(sql), args) File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 82, in execute result = PyFormatCursor.execute(self, *args) File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 60, in execute args or []) File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 52, in _rollback_on_error return function(self, *args, **kwargs) sqlite3.OperationalError: too many SQL variables >>>
Python 2.5.6 (r256:88840, Oct 21 2014, 22:49:55) [GCC 4.8.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> from trac.test import EnvironmentStub >>> env = EnvironmentStub() >>> def q(n): ... query = 'SELECT * FROM system WHERE 0 IN (%s)' % ','.join(('%s',) * n) ... args = list(xrange(n)) ... return env.db_query(query, args) ... >>> q(999) [(u'database_version', u'29')] >>> q(1000) [(u'database_version', u'29')] >>> q(250000) [(u'database_version', u'29')] >>> q(250001) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "<stdin>", line 4, in q File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/api.py", line 124, in execute return db.execute(query, params) File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/util.py", line 128, in execute cursor.execute(query, params if params is not None else []) File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/util.py", line 61, in execute r = self.cursor.execute(sql_escape_percent(sql), args) File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 82, in execute result = PyFormatCursor.execute(self, *args) File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 60, in execute args or []) File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 52, in _rollback_on_error return function(self, *args, **kwargs) sqlite3.OperationalError: too many SQL variables
comment:7 by , 7 years ago
Release Notes: | modified (diff) |
---|---|
Resolution: | → fixed |
Status: | assigned → closed |
Committed in [16289] and merged in [16290-16291].
Note:
See TracTickets
for help on using tickets.
On 1.0-stable, timing execution of
sync
:trac/versioncontrol/api.py
I did some rough profiling with teo repository configured for sync per request. This is just refreshing the page and no new revisions in repository.
rjollos.git:t12895_repos_sync_performance:
cached_repository = False
cached_repository = True
persistent_cache = False
persistent_cache = True
1.0-stable:
cached_repository = False
cached_repository = True
persistent_cache = False
persistent_cache = True