Modify ↓
#12895 closed enhancement (fixed)
Improve performance of sync command with many revisions
| Reported by: | Ryan J Ollos | Owned by: | Jun Omae |
|---|---|---|---|
| Priority: | normal | Milestone: | 1.0.17 |
| Component: | plugin/git | Version: | |
| Severity: | normal | Keywords: | performance |
| Cc: | Branch: | ||
| Release Notes: |
Improve sync command when git repository has no changes. |
||
| API Changes: | |||
| Internal Changes: | |||
Description
The patch was proposed in gmessage:trac-dev:2ityvgVZxsQ/KEA6IwY2CAAJ.
-
tracopt/versioncontrol/git/PyGIT.py
diff --git a/tracopt/versioncontrol/git/PyGIT.py b/tracopt/versioncontrol/git/PyGIT.py index 47f397389..32aac9184 100644
a b class Storage(object): 596 596 key=lambda (name, rev, head): (not head, name)) 597 597 return [(name, rev) for name, rev, head in branches] 598 598 599 def get_refs(self): 600 for refname, rev in self.rev_cache.refs_dict.iteritems(): 601 if refname != 'HEAD': 602 yield refname, rev 603 599 604 def get_commits(self): 600 605 return self.rev_cache.rev_dict 601 606 -
tracopt/versioncontrol/git/git_fs.py
diff --git a/tracopt/versioncontrol/git/git_fs.py b/tracopt/versioncontrol/git/git_fs.py index 031f68c2b..c33eb7a5d 100644
a b class GitCachedRepository(CachedRepository): 100 100 return count > 0 101 101 return False 102 102 103 def needs_sync(): 104 max_holders = 999 105 revs = sorted(set(rev for refname, rev in repos.git.get_refs())) 106 for idx in xrange(0, len(revs), max_holders): 107 revs_ = revs[idx:idx + max_holders] 108 holders = ','.join(('%s',) * len(revs_)) 109 args = [self.id] 110 args.extend(revs_) 111 query = 'SELECT COUNT(*) FROM revision ' \ 112 'WHERE repos=%s AND rev IN (' + holders + ')' 113 for count, in self.env.db_query(query, args): 114 if count < len(revs_): 115 return True 116 return False 117 103 118 def traverse(rev, seen): 104 119 revs = [] 105 120 merge_revs = [] … … class GitCachedRepository(CachedRepository): 121 136 revs[idx:idx] = traverse(rev, seen) 122 137 return revs 123 138 124 while True: 125 repos.sync() 126 repos_youngest = repos.youngest_rev or '' 139 def sync_revs(): 127 140 updated = False 128 141 seen = set() 129 142 … … class GitCachedRepository(CachedRepository): 148 161 if feedback: 149 162 feedback(rev) 150 163 151 if updated: 152 continue # sync again 164 return updated 153 165 166 while True: 167 repos.sync() 168 repos_youngest = repos.youngest_rev or '' 169 if needs_sync() and sync_revs(): 170 continue # sync again 154 171 if meta_youngest != repos_youngest: 155 172 with self.env.db_transaction as db: 156 173 db("""
Attachments (0)
Change History (8)
comment:1 by , 8 years ago
| Keywords: | performance added |
|---|
comment:2 by , 8 years ago
comment:3 by , 8 years ago
| Owner: | set to |
|---|---|
| Status: | new → assigned |
comment:4 by , 8 years ago
My proposed changes has off-by-one error. See [a9fd49b97/jomae.git] (jomae.git@t12895).
comment:5 by , 8 years ago
| Owner: | changed from to |
|---|
comment:6 by , 8 years ago
Max place-holders in sqlite3 is 999 on Windows, however it is 250000 on Ubuntu and Debian.
Python 2.7.10 (default, May 23 2015, 09:44:00) [MSC v.1500 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> from trac.test import EnvironmentStub
>>> env = EnvironmentStub()
>>> def q(n):
... query = 'SELECT * FROM system WHERE 0 IN ({0})'.format(','.join(('%s',) * n))
... args = list(xrange(n))
... return env.db_query(query, args)
...
>>> q(999)
[(u'database_version', u'29')]
>>> q(1000)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 4, in q
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\api.py", line 124, in execute
return db.execute(query, params)
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\util.py", line 128, in execute
cursor.execute(query, params if params is not None else [])
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\util.py", line 61, in execute
r = self.cursor.execute(sql_escape_percent(sql), args)
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 82, in execute
result = PyFormatCursor.execute(self, *args)
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 60, in execute
args or [])
File "C:\venv\trac-1.0.15\lib\site-packages\trac\db\sqlite_backend.py", line 52, in _rollback_on_error
return function(self, *args, **kwargs)
sqlite3.OperationalError: too many SQL variables
>>>
Python 2.5.6 (r256:88840, Oct 21 2014, 22:49:55)
[GCC 4.8.2] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from trac.test import EnvironmentStub
>>> env = EnvironmentStub()
>>> def q(n):
... query = 'SELECT * FROM system WHERE 0 IN (%s)' % ','.join(('%s',) * n)
... args = list(xrange(n))
... return env.db_query(query, args)
...
>>> q(999)
[(u'database_version', u'29')]
>>> q(1000)
[(u'database_version', u'29')]
>>> q(250000)
[(u'database_version', u'29')]
>>> q(250001)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 4, in q
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/api.py", line 124, in execute
return db.execute(query, params)
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/util.py", line 128, in execute
cursor.execute(query, params if params is not None else [])
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/util.py", line 61, in execute
r = self.cursor.execute(sql_escape_percent(sql), args)
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 82, in execute
result = PyFormatCursor.execute(self, *args)
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 60, in execute
args or [])
File "/venv/trac/1.0.15/lib/python2.5/site-packages/trac/db/sqlite_backend.py", line 52, in _rollback_on_error
return function(self, *args, **kwargs)
sqlite3.OperationalError: too many SQL variables
comment:7 by , 8 years ago
| Release Notes: | modified (diff) |
|---|---|
| Resolution: | → fixed |
| Status: | assigned → closed |
Committed in [16289] and merged in [16290-16291].
Note:
See TracTickets
for help on using tickets.



On 1.0-stable, timing execution of
sync:trac/versioncontrol/api.py
I did some rough profiling with teo repository configured for sync per request. This is just refreshing the page and no new revisions in repository.
rjollos.git:t12895_repos_sync_performance:
cached_repository = Falsecached_repository = Truepersistent_cache = Falsepersistent_cache = True1.0-stable:
cached_repository = Falsecached_repository = Truepersistent_cache = Falsepersistent_cache = True