Source code for leo.core.leoCache

#@+leo-ver=5-thin
#@+node:ekr.20100208065621.5894: * @file leoCache.py
'''A module encapsulating Leo's file caching'''
#@+<< imports >>
#@+node:ekr.20100208223942.10436: ** << imports >> (leoCache)
import sys
isPython3 = sys.version_info >= (3, 0, 0)
import leo.core.leoGlobals as g
import leo.core.leoNodes as leoNodes
if isPython3:
    import pickle
else:
    import cPickle as pickle
# import glob
import fnmatch
import hashlib
import os
import stat
# import time
import zlib
import sqlite3
# try:
    # import marshal
# except ImportError:
    # marshal = None
#@-<< imports >>
# Abbreviations used throughout.
abspath = g.os_path_abspath
basename = g.os_path_basename
expanduser = g.os_path_expanduser
isdir = g.os_path_isdir
isfile = g.os_path_isfile
join = g.os_path_join
normcase = g.os_path_normcase
split = g.os_path_split
SQLITE = True
#@+others
#@+node:ekr.20100208062523.5885: ** class Cacher
[docs]class Cacher(object):
    '''A class that encapsulates all aspects of Leo's file caching.'''
    #@+others
    #@+node:ekr.20100208082353.5919: *3* cacher.Birth
    #@+node:ekr.20100208062523.5886: *4* cacher.ctor
    def __init__(self, c=None):
        
        self.c = c
        # set by initFileDB and initGlobalDB...
        self.db = {}
            # When caching is enabled will be a PickleShareDB instance.
        self.dbdirname = None # A string.
        self.globals_tag = 'leo.globals'
            # 'leo3k.globals' if g.isPython3 else 'leo2k.globals'
        self.inited = False
    #@+node:ekr.20100208082353.5918: *4* cacher.initFileDB
[docs]    def initFileDB(self, fn):

        if not fn: return
        pth, bname = split(fn)
        if pth and bname:
            fn = fn.lower()
            fn = g.toEncodedString(fn) # Required for Python 3.x.
            # Important: this creates a top-level directory of the form x_y.
            # x is a short file name, included for convenience.
            # y is a key computed by the *full* path name fn.
            # Thus, there will a separate top-level directory for every path.
            self.dbdirname = dbdirname = join(g.app.homeLeoDir, 'db',
                '%s_%s' % (bname, hashlib.md5(fn).hexdigest()))
            self.db = SqlitePickleShare(dbdirname) if SQLITE else PickleShareDB(dbdirname)
            # Fixes bug 670108.
            self.c.db = self.db
            self.inited = True
    #@+node:ekr.20100208082353.5920: *4* cacher.initGlobalDb
[docs]    def initGlobalDB(self):

        # New in Leo 4.10.1.
        # We always create the global db, even if caching is disabled.
        try:
            dbdirname = g.app.homeLeoDir + "/db/global"
            db = SqlitePickleShare(dbdirname) if SQLITE else PickleShareDB(dbdirname)
            self.db = db
            self.inited = True
            return db
        except Exception:
            return {} # Use a plain dict as a dummy.
    #@+node:ekr.20100210163813.5747: *4* cacher.save
[docs]    def save(self, fn, changeName):
        if SQLITE:
            self.commit(True)
        if changeName or not self.inited:
            self.initFileDB(fn)
    #@+node:ekr.20100209160132.5759: *3* cacher.clearCache & clearAllCaches
[docs]    def clearCache(self):
        '''Clear the cache for the open window.'''
        if self.db:
            # Be careful about calling db.clear.
            try:
                self.db.clear(verbose=True)
            except TypeError:
                self.db.clear() # self.db is a Python dict.
            except Exception:
                g.trace('unexpected exception')
                g.es_exception()
                self.db = {}

[docs]    def clearAllCaches(self):
        '''
        Clear the Cachers *only* for all open windows. This is much safer than
        killing all db's.
        '''
        for frame in g.windows():
            c = frame.c
            if c.cacher:
                c.cacher.clearCache()
        g.es('done', color='blue')
    #@+node:ekr.20100208071151.5907: *3* cacher.fileKey
[docs]    def fileKey(self, fileName, content, requireEncodedString=False):
        '''
        Compute the hash of fileName and content. fileName may be unicode,
        content must be bytes (or plain string in Python 2.x).
        '''
        m = hashlib.md5()
        if g.isUnicode(fileName):
            fileName = g.toEncodedString(fileName)
        if g.isUnicode(content):
            if requireEncodedString:
                g.internalError('content arg must be str/bytes')
            content = g.toEncodedString(content)
        # New in Leo 5.6: Use the git branch name in the key.
        branch = g.gitBranchName()
        branch = g.toEncodedString(branch)
            # Fix #475.
        m.update(branch)
        m.update(fileName)
        m.update(content)
        return "fcache/" + m.hexdigest()
    #@+node:ekr.20100208082353.5925: *3* cacher.Reading
    #@+node:vitalije.20180507112319.1: *4* cacher.collectChangedNodes
[docs]    def collectChangedNodes(self, root_v, aList, fileName):
        '''Populates c.nodeConflictList with data about nodes that
           are going to change during recreation of outline from 
           cached list.'''
        c = self.c
        #@+others
        #@+node:vitalije.20180507113809.1: *5* vnodes helper iterator
        gnxDict = c.fileCommands.gnxDict
        def vnodes(_vlist):
            h, b, gnx, c_vlist = _vlist
            v = gnxDict.get(gnx)
            if v:
                yield v, h, b, gnx
            for x in c_vlist:
                for y in vnodes(x):
                    yield y
        #@-others
        for v, h, b, gnx in vnodes(aList):
            if v is root_v and not (v.b or v.children):
                #
                # no children and an empty body of root node most probably means
                # we are reading external file for the first time. In this case
                # there is no point in counting this as nodeConflict
                continue
            same_h = v.h == h
            same_b = (v.b == b or
                (v.b[-1:] == '\n' and v.b[:-1] == b) or
                (b[-1:] == '\n' and v.b == b[:-1]))
            if same_h and same_b:
                continue
            c.nodeConflictList.append(g.bunch(
                tag='(cached)',
                fileName=fileName,
                gnx=gnx,
                b_old=v.b,
                h_old=v.h,
                b_new=b,
                h_new=h,
                root_v=root_v,
            ))
    #@+node:ekr.20100208071151.5910: *4* cacher.createOutlineFromCacheList & helpers
[docs]    def createOutlineFromCacheList(self, parent_v, aList, fileName, top=True):
        '''
        Create outline structure from recursive aList built by makeCacheList.
        '''
        c = self.c
        if not c:
            g.internalError('no c')
            return
        if top:
            c.cacheListFileName = fileName
        if not aList:
            return
        h, b, gnx, children = aList
        if h is not None:
            v = parent_v
            # Does this destroy the ability to handle the rare case?
            v._headString = g.toUnicode(h)
            v._bodyString = g.toUnicode(b)
        for child_tuple in children:
            h, b, gnx, grandChildren = child_tuple
            isClone, child_v = self.fastAddLastChild(fileName, gnx, parent_v)
            if isClone:
                self.checkForChangedNodes(child_tuple, fileName, parent_v)
            else:
                self.createOutlineFromCacheList(child_v, child_tuple, fileName, top=False)
        
    #@+node:ekr.20170622112151.1: *5* cacher.checkForChangedNodes
    # update_warning_given = False

[docs]    def checkForChangedNodes(self, child_tuple, fileName, parent_v):
        '''
        Update the outline described by child_tuple, including all descendants.
        '''
        h, junk_b, gnx, grand_children = child_tuple
        child_v = self.c.fileCommands.gnxDict.get(gnx)
        if child_v:
            self.reportIfNodeChanged(child_tuple, child_v, fileName, parent_v)
            for grand_child in grand_children:
                self.checkForChangedNodes(grand_child, fileName, child_v)
            gnxes_in_cache = set(x[2] for x in grand_children)
            for_removal = [(i, v)
                for i, v in enumerate(child_v.children) 
                if v.gnx not in gnxes_in_cache]
            for i, v in reversed(for_removal):
                v._cutLink(i, child_v)
            #
            # sort children in the order from cache
            for i, grand_child in enumerate(grand_children):
                gnx = grand_child[2]
                v = self.c.fileCommands.gnxDict.get(gnx)
                if i < len(child_v.children):
                    child_v.children[i] = v
                elif i == len(child_v.children):
                    g.trace('Warning: dubious cache: please check %s' % fileName)
                    child_v.children.append(v)
                else:
                    g.trace('Corrupted cache. Use --no-cache')
                    g.printObj(child_v.children)
                    raise IndexError
        else:
            # If the outline is out of sync, there may be write errors later,
            # but the user should be handle them easily enough.
            isClone, child_v = self.fastAddLastChild(fileName, gnx, parent_v)
            self.createOutlineFromCacheList(child_v, child_tuple, fileName, top=False)
            # if not self.update_warning_given: # not needed.
                # self.update_warning_given = True
                # g.internalError('no vnode', child_tuple)
    #@+node:ekr.20100208071151.5911: *5* cacher.fastAddLastChild (sets tempRoots)
    # Similar to createThinChild4

[docs]    def fastAddLastChild(self, fileName, gnxString, parent_v):
        '''
        Create new VNode as last child of the receiver.
        If the gnx exists already, create a clone instead of new VNode.
        '''
        trace = 'gnx' in g.app.debug
        c = self.c
        gnxString = g.toUnicode(gnxString)
        gnxDict = c.fileCommands.gnxDict
        if gnxString is None:
            v = None
        else:
            v = gnxDict.get(gnxString)
        is_clone = v is not None
        if trace:
            g.trace(
                'clone', '%-5s' % (is_clone),
                'parent_v', parent_v, 'gnx', gnxString, 'v', repr(v))
        if is_clone:
            # new-read: update tempRoots.
            if not hasattr(v, 'tempRoots'):
                v.tempRoots = set()
            v.tempRoots.add(fileName)
        else:
            if gnxString:
                assert g.isUnicode(gnxString)
                v = leoNodes.VNode(context=c, gnx=gnxString)
                if 'gnx' in g.app.debug:
                    g.trace(c.shortFileName(), gnxString, v)
            else:
                v = leoNodes.VNode(context=c)
                # This is not an error: it can happen with @auto nodes.
                    # g.trace('**** no gnx for',v,parent_v)
            # Indicate that this node came from an external file.
            v.tempRoots = set()
            v.tempRoots.add(fileName)
        child_v = v
        child_v._linkAsNthChild(parent_v, parent_v.numberOfChildren())
        child_v.setVisited() # Supress warning/deletion of unvisited nodes.
        return is_clone, child_v
    #@+node:ekr.20100705083838.5740: *5* cacher.reportIfNodeChanged
[docs]    def reportIfNodeChanged(self, child_tuple, child_v, fileName, parent_v):
        '''
        Schedule a recovered node if child_v is substantially different from an
        earlier version.

        Issue a (rare) warning if two different files are involved.
        '''
        always_warn = True # True always warn about changed nodes.
        c = self.c
        h, b, gnx, grandChildren = child_tuple
        old_b, new_b = child_v.b, b
        old_h, new_h = child_v.h, h
        # Leo 5.6: test headlines.
        same_head = old_h == new_h
        same_body = (
            old_b == new_b or
            new_b.endswith('\n') and old_b == new_b[: -1] or
            old_b.endswith('\n') and new_b == old_b[: -1]
        )
        if same_head and same_body:
            return
        old_roots = list(getattr(child_v, 'tempRoots', set()))
        same_file = (
            len(old_roots) == 0 or
            len(old_roots) == 1 and old_roots[0] == fileName
        )
        must_warn = not same_file
        if not hasattr(child_v, 'tempRoots'):
            child_v.tempRoots = set()
        child_v.tempRoots.add(fileName)
        if must_warn:
            # This is the so-called "rare" case:
            # The node differs  in two different external files.
            self.warning('out-of-sync node: %s' % h)
            g.es_print('using node in %s' % fileName)
        if always_warn or must_warn:
            if c.make_node_conflicts_node:
                g.es_print('creating recovered node:', h)
            c.nodeConflictList.append(g.bunch(
                tag='(cached)',
                fileName=fileName,
                gnx=gnx,
                b_old=child_v.b,
                h_old=child_v.h,
                b_new=b,
                h_new=h,
                root_v=parent_v,
            ))
        # Always update the node.
        child_v.h, child_v.b = h, b
        child_v.setDirty()
        c.changed = True # Tell getLeoFile to propegate dirty nodes.
    #@+node:vitalije.20180507114013.1: *4* cacher.createOutlineFromCacheList2
[docs]    def createOutlineFromCacheList2(self, parent_v, aList):
        '''
        Create outline structure from recursive aList built by makeCacheList.
        '''
        c = self.c
        #@+others
        #@+node:vitalije.20180507115741.1: *5* recreateV helper
        gnxDict = c.fileCommands.gnxDict
        def recreateV(_vlist):
            h, b, gnx, c_vlist = _vlist
            v = gnxDict.get(gnx)
            if not v:
                v = leoNodes.VNode(context=c, gnx=gnx)
            else:
                del v.children[:]
            v.h = h
            v.b = b
            for x in c_vlist:
                cv = recreateV(x)
                v.children.append(cv)
                if v not in cv.parents:
                    cv.parents.append(v)
            return v
        #@-others
        recreateV(aList)
    #@+node:ekr.20100208082353.5923: *4* cacher.getCachedGlobalFileRatios
[docs]    def getCachedGlobalFileRatios(self):

        c = self.c
        if not c:
            return g.internalError('no commander')
        key = self.fileKey(c.mFileName, self.globals_tag)
        try:
            ratio = float(self.db.get('body_outline_ratio_%s' % (key), '0.5'))
        except TypeError:
            ratio = 0.5
        try:
            ratio2 = float(self.db.get('body_secondary_ratio_%s' % (key), '0.5'))
        except TypeError:
            ratio2 = 0.5
        return ratio, ratio2
    #@+node:ekr.20100208082353.5924: *4* cacher.getCachedStringPosition
[docs]    def getCachedStringPosition(self):

        c = self.c
        if not c:
            return g.internalError('no commander')
        key = self.fileKey(c.mFileName, self.globals_tag)
        str_pos = self.db.get('current_position_%s' % key)
        return str_pos
    #@+node:ekr.20100208082353.5922: *4* cacher.getCachedWindowPositionDict
[docs]    def getCachedWindowPositionDict(self, fn):
        '''Return a dict containing window positions.'''
        c = self.c
        if not c:
            g.internalError('no commander')
            return {}
        key = self.fileKey(c.mFileName, self.globals_tag)
        data = self.db.get('window_position_%s' % (key))
        # pylint: disable=unpacking-non-sequence
        if data:
            top, left, height, width = data
            top, left, height, width = int(top), int(left), int(height), int(width)
            d = {'top': top, 'left': left, 'height': height, 'width': width}
        else:
            d = {}
        return d
    #@+node:ekr.20100208071151.5905: *4* cacher.readFile
[docs]    def readFile(self, fileName, root):
        '''
        Read the file from the cache if possible.
        Return (s,ok,key)
        '''
        # sfn = g.shortFileName(fileName)
        if not g.enableDB:
            return '', False, None
        s = g.readFileIntoEncodedString(fileName, silent=True)
        if s is None:
            return s, False, None
        assert not g.isUnicode(s)
        # There will be a bug if s is not already an encoded string.
        key = self.fileKey(fileName, s, requireEncodedString=True)
            # Fix bug #385: use the full fileName, not root.h.
        ok = self.db and key in self.db
        if ok:
            # Delete the previous tree, regardless of the @<file> type.
            while root.hasChildren():
                root.firstChild().doDelete()
            # Recreate the file from the cache.
            aList = self.db.get(key)
            self.collectChangedNodes(root.v, aList, fileName)
            self.createOutlineFromCacheList2(root.v, aList)
            #self.createOutlineFromCacheList(root.v, aList, fileName=fileName)
        return s, ok, key
    #@+node:ekr.20100208082353.5927: *3* cacher.Writing
    #@+node:ekr.20100208071151.5901: *4* cacher.makeCacheList
[docs]    def makeCacheList(self, p):
        '''Create a recursive list describing a tree
        for use by createOutlineFromCacheList.
        '''
        # This is called after at.readPostPass, so p.b *is* the body text.
        return [
            p.h, p.b, p.gnx,
            [self.makeCacheList(p2) for p2 in p.children()]]
    #@+node:ekr.20100208082353.5929: *4* cacher.setCachedGlobalsElement
[docs]    def setCachedGlobalsElement(self, fn):

        c = self.c
        if not c:
            return g.internalError('no commander')
        key = self.fileKey(c.mFileName, self.globals_tag)
        self.db['body_outline_ratio_%s' % key] = str(c.frame.ratio)
        self.db['body_secondary_ratio_%s' % key] = str(c.frame.secondary_ratio)
        width, height, left, top = c.frame.get_window_info()
        self.db['window_position_%s' % key] = (
            str(top), str(left), str(height), str(width))
    #@+node:ekr.20100208082353.5928: *4* cacher.setCachedStringPosition
[docs]    def setCachedStringPosition(self, str_pos):

        c = self.c
        if not c:
            return g.internalError('no commander')
        key = self.fileKey(c.mFileName, self.globals_tag)
        self.db['current_position_%s' % key] = str_pos
    #@+node:ekr.20100208071151.5903: *4* cacher.writeFile
[docs]    def writeFile(self, p, fileKey):
        '''Update the cache after reading the file.'''
        # Check g.enableDB before giving internal error.
        if not g.enableDB:
            pass
        elif not fileKey:
            g.trace(g.callers(5))
            g.internalError('empty fileKey')
        elif self.db.get(fileKey):
            pass
        else:
            self.db[fileKey] = self.makeCacheList(p)
    #@+node:ekr.20100208065621.5890: *3* cacher.test
[docs]    def test(self):
        
        # pylint: disable=no-member
        if g.app.gui.guiName() == 'nullGui':
            # Null gui's don't normally set the g.app.gui.db.
            g.app.setGlobalDb()
        # Fixes bug 670108.
        assert g.app.db is not None
            # a PickleShareDB instance.
        # Make sure g.guessExternalEditor works.
        g.app.db.get("LEO_EDITOR")
        self.initFileDB('~/testpickleshare')
        db = self.db
        db.clear()
        assert not list(db.items())
        db['hello'] = 15
        db['aku ankka'] = [1, 2, 313]
        db['paths/nest/ok/keyname'] = [1, (5, 46)]
        db.uncache() # frees memory, causes re-reads later
        if 0: print(db.keys())
        db.clear()
        return True
    #@+node:ekr.20170624135447.1: *3* cacher.warning
[docs]    def warning(self, s):
        '''Print a warning message in red.'''
        g.es_print('Warning: %s' % s.lstrip(), color='red')
    #@-others
[docs]    def commit(self, close=True):
        # in some cases while unit testing self.db is python dict
        if SQLITE and hasattr(self.db, 'conn'):
            # pylint: disable=no-member
            self.db.conn.commit()
            if close:
                self.db.conn.close()
                self.inited = False
#@+node:ekr.20100208223942.5967: ** class PickleShareDB
_sentinel = object()

[docs]class PickleShareDB(object):
    """ The main 'connection' object for PickleShare database """
    #@+others
    #@+node:ekr.20100208223942.5968: *3*  Birth & special methods
    #@+node:ekr.20100208223942.5969: *4*  __init__ (PickleShareDB)
    def __init__(self, root):
        """
        Init the PickleShareDB class.
        root: The directory that contains the data. Created if it doesn't exist.
        """
        self.root = abspath(expanduser(root))
        if not isdir(self.root) and not g.unitTesting:
            self._makedirs(self.root)
        self.cache = {}
            # Keys are normalized file names.
            # Values are tuples (obj, orig_mod_time)

        def loadz(fileobj):
            if fileobj:
                try:
                    val = pickle.loads(
                        zlib.decompress(fileobj.read()))
                except ValueError:
                    g.es("Unpickling error - Python 3 data accessed from Python 2?")
                    return None
                return val
            else:
                return None

        def dumpz(val, fileobj):
            if fileobj:
                try:
                    # use Python 2's highest protocol, 2, if possible
                    data = pickle.dumps(val, 2)
                except Exception:
                    # but use best available if that doesn't work (unlikely)
                    data = pickle.dumps(val, pickle.HIGHEST_PROTOCOL)
                compressed = zlib.compress(data)
                fileobj.write(compressed)

        self.loader = loadz
        self.dumper = dumpz
    #@+node:ekr.20100208223942.5970: *4* __contains__(PickleShareDB)
    def __contains__(self, key):

        return self.has_key(key) # NOQA
    #@+node:ekr.20100208223942.5971: *4* __delitem__
    def __delitem__(self, key):
        """ del db["key"] """
        fn = join(self.root, key)
        self.cache.pop(fn, None)
        try:
            os.remove(fn)
        except OSError:
            # notfound and permission denied are ok - we
            # lost, the other process wins the conflict
            pass
    #@+node:ekr.20100208223942.5972: *4* __getitem__
    def __getitem__(self, key):
        """ db['key'] reading """
        fn = join(self.root, key)
        try:
            mtime = (os.stat(fn)[stat.ST_MTIME])
        except OSError:
            raise KeyError(key)
        if fn in self.cache and mtime == self.cache[fn][1]:
            obj = self.cache[fn][0]
            return obj
        try:
            # The cached item has expired, need to read
            obj = self.loader(self._openFile(fn, 'rb'))
        except Exception:
            raise KeyError(key)
        self.cache[fn] = (obj, mtime)
        return obj
    #@+node:ekr.20100208223942.5973: *4* __iter__
    def __iter__(self):

        for k in list(self.keys()):
            yield k
    #@+node:ekr.20100208223942.5974: *4* __repr__
    def __repr__(self):
        return "PickleShareDB('%s')" % self.root
    #@+node:ekr.20100208223942.5975: *4* __setitem__
    def __setitem__(self, key, value):
        """ db['key'] = 5 """
        fn = join(self.root, key)
        parent, junk = split(fn)
        if parent and not isdir(parent):
            self._makedirs(parent)
        self.dumper(value, self._openFile(fn, 'wb'))
        try:
            mtime = os.path.getmtime(fn)
            self.cache[fn] = (value, mtime)
        except OSError as e:
            if e.errno != 2:
                raise
    #@+node:ekr.20100208223942.10452: *3* _makedirs
    def _makedirs(self, fn, mode=0o777):

        os.makedirs(fn, mode)
    #@+node:ekr.20100208223942.10458: *3* _openFile
    def _openFile(self, fn, mode='r'):
        """ Open this file.  Return a file object.

        Do not print an error message.
        It is not an error for this to fail.
        """
        try:
            return open(fn, mode)
        except Exception:
            return None
    #@+node:ekr.20100208223942.10454: *3* _walkfiles & helpers
    def _walkfiles(self, s, pattern=None):
        """ D.walkfiles() -> iterator over files in D, recursively.

        The optional argument, pattern, limits the results to files
        with names that match the pattern.  For example,
        mydir.walkfiles('*.tmp') yields only files with the .tmp
        extension.
        """
        for child in self._listdir(s):
            if isfile(child):
                if pattern is None or self._fn_match(child, pattern):
                    yield child
            elif isdir(child):
                for f in self._walkfiles(child, pattern):
                    yield f
    #@+node:ekr.20100208223942.10456: *4* _listdir
    def _listdir(self, s, pattern=None):
        """ D.listdir() -> List of items in this directory.

        Use D.files() or D.dirs() instead if you want a listing
        of just files or just subdirectories.

        The elements of the list are path objects.

        With the optional 'pattern' argument, this only lists
        items whose names match the given pattern.
        """
        names = os.listdir(s)
        if pattern is not None:
            names = fnmatch.filter(names, pattern)
        return [join(s, child) for child in names]
    #@+node:ekr.20100208223942.10464: *4* _fn_match
    def _fn_match(self, s, pattern):
        """ Return True if self.name matches the given pattern.

        pattern - A filename pattern with wildcards, for example '*.py'.
        """
        return fnmatch.fnmatch(basename(s), pattern)
    #@+node:ekr.20100208223942.5978: *3* clear (PickleShareDB)
[docs]    def clear(self, verbose=False):
        # Deletes all files in the fcache subdirectory.
        # It would be more thorough to delete everything
        # below the root directory, but it's not necessary.
        if verbose:
            g.red('clearing cache at directory...\n')
            g.es_print(self.root)
        for z in self.keys():
            self.__delitem__(z)
    #@+node:ekr.20100208223942.5979: *3* get
[docs]    def get(self, key, default=None):

        try:
            val = self[key]
            return val
        except KeyError:
            return default
    #@+node:ekr.20100208223942.5980: *3* has_key (PickleShareDB)
[docs]    def has_key(self, key):

        try:
            self[key]
        except KeyError:
            return False
        return True
    #@+node:ekr.20100208223942.5981: *3* items
[docs]    def items(self):
        return [z for z in self]
    #@+node:ekr.20100208223942.5982: *3* keys & helpers (PickleShareDB)
    # Called by clear, and during unit testing.

[docs]    def keys(self, globpat=None):
        """Return all keys in DB, or all keys matching a glob"""
        if globpat is None:
            files = self._walkfiles(self.root)
        else:
            # Do not call g.glob_glob here.
            files = [z for z in join(self.root, globpat)]
        result = [self._normalized(p) for p in files if isfile(p)]
        return result
    #@+node:ekr.20100208223942.5976: *4* _normalized
    def _normalized(self, p):
        """ Make a key suitable for user's eyes """
        # os.path.relpath doesn't work here.
        return self._relpathto(self.root, p).replace('\\', '/')
    #@+node:ekr.20100208223942.10460: *4* _relpathto
    # Used only by _normalized.

    def _relpathto(self, src, dst):
        """ Return a relative path from self to dst.

        If there is no relative path from self to dst, for example if
        they reside on different drives in Windows, then this returns
        dst.abspath().
        """
        origin = abspath(src)
        dst = abspath(dst)
        orig_list = self._splitall(normcase(origin))
        # Don't normcase dst!  We want to preserve the case.
        dest_list = self._splitall(dst)
        if orig_list[0] != normcase(dest_list[0]):
            # Can't get here from there.
            return dst
        # Find the location where the two paths start to differ.
        i = 0
        for start_seg, dest_seg in zip(orig_list, dest_list):
            if start_seg != normcase(dest_seg):
                break
            i += 1
        # Now i is the point where the two paths diverge.
        # Need a certain number of "os.pardir"s to work up
        # from the origin to the point of divergence.
        segments = [os.pardir] * (len(orig_list) - i)
        # Need to add the diverging part of dest_list.
        segments += dest_list[i:]
        if segments:
            return join(*segments)
        else:
            # If they happen to be identical, use os.curdir.
            return os.curdir
    #@+node:ekr.20100208223942.10462: *4* _splitall
    # Used by relpathto.

    def _splitall(self, s):
        """ Return a list of the path components in this path.

        The first item in the list will be a path.  Its value will be
        either os.curdir, os.pardir, empty, or the root directory of
        this path (for example, '/' or 'C:\\').  The other items in
        the list will be strings.

        path.path.joinpath(*result) will yield the original path.
        """
        parts = []
        loc = s
        while loc != os.curdir and loc != os.pardir:
            prev = loc
            loc, child = split(prev)
            if loc == prev:
                break
            parts.append(child)
        parts.append(loc)
        parts.reverse()
        return parts
    #@+node:ekr.20100208223942.5989: *3* uncache
[docs]    def uncache(self, *items):
        """ Removes all, or specified items from cache

        Use this after reading a large amount of large objects
        to free up memory, when you won't be needing the objects
        for a while.

        """
        if not items:
            self.cache = {}
        for it in items:
            self.cache.pop(it, None)
    #@-others
#@+node:vitalije.20170716201700.1: ** class SqlitePickleShare
_sentinel = object()

[docs]class SqlitePickleShare(object):
    """ The main 'connection' object for SqlitePickleShare database """
    #@+others
    #@+node:vitalije.20170716201700.2: *3*  Birth & special methods
[docs]    def init_dbtables(self, conn):
        sql = 'create table if not exists cachevalues(key text primary key, data blob);'
        conn.execute(sql)
    #@+node:vitalije.20170716201700.3: *4*  __init__ (SqlitePickleShare)
    def __init__(self, root):
        """
        Init the SqlitePickleShare class.
        root: The directory that contains the data. Created if it doesn't exist.
        """
        self.root = abspath(expanduser(root))
        if not isdir(self.root) and not g.unitTesting:
            self._makedirs(self.root)
        dbfile = ':memory:' if g.unitTesting else join(root, 'cache.sqlite')
        self.conn = sqlite3.connect(dbfile, isolation_level=None)
        self.init_dbtables(self.conn)
        self.cache = {}
            # Keys are normalized file names.
            # Values are tuples (obj, orig_mod_time)

        def loadz(data):
            if data:
                try:
                    val = pickle.loads(zlib.decompress(data))
                except (ValueError, TypeError):
                    g.es("Unpickling error - Python 3 data accessed from Python 2?")
                    return None
                return val
            else:
                return None

        def dumpz(val):
            try:
                # use Python 2's highest protocol, 2, if possible
                data = pickle.dumps(val, protocol=2)
            except Exception:
                # but use best available if that doesn't work (unlikely)
                data = pickle.dumps(val, pickle.HIGHEST_PROTOCOL)
            return sqlite3.Binary(zlib.compress(data))

        self.loader = loadz
        self.dumper = dumpz
        if g.isPython3:
            self.reset_protocol_in_values()
    #@+node:vitalije.20170716201700.4: *4* __contains__(SqlitePickleShare)
    def __contains__(self, key):

        return self.has_key(key) # NOQA
    #@+node:vitalije.20170716201700.5: *4* __delitem__
    def __delitem__(self, key):
        """ del db["key"] """
        try:
            self.conn.execute('''delete from cachevalues
                where key=?''', (key,))
        except sqlite3.OperationalError:
            pass

    #@+node:vitalije.20170716201700.6: *4* __getitem__
    def __getitem__(self, key):
        """ db['key'] reading """
        try:
            obj = None
            for row in self.conn.execute('''select data from cachevalues
                where key=?''', (key,)):
                obj = self.loader(row[0])
                break
            else:
                raise KeyError(key)
        except sqlite3.Error:
            raise KeyError(key)
        return obj
    #@+node:vitalije.20170716201700.7: *4* __iter__
    def __iter__(self):

        for k in list(self.keys()):
            yield k
    #@+node:vitalije.20170716201700.8: *4* __repr__
    def __repr__(self):
        return "SqlitePickleShare('%s')" % self.root
    #@+node:vitalije.20170716201700.9: *4* __setitem__
    def __setitem__(self, key, value):
        """ db['key'] = 5 """
        try:
            data = self.dumper(value)
            self.conn.execute('''replace into cachevalues(key, data)
                values(?,?);''', (key, data))
        except sqlite3.OperationalError as e:
            g.es_exception(e)

    #@+node:vitalije.20170716201700.10: *3* _makedirs
    def _makedirs(self, fn, mode=0o777):

        os.makedirs(fn, mode)
    #@+node:vitalije.20170716201700.11: *3* _openFile
    def _openFile(self, fn, mode='r'):
        """ Open this file.  Return a file object.

        Do not print an error message.
        It is not an error for this to fail.
        """
        try:
            return open(fn, mode)
        except Exception:
            return None
    #@+node:vitalije.20170716201700.12: *3* _walkfiles & helpers
    def _walkfiles(self, s, pattern=None):
        """ D.walkfiles() -> iterator over files in D, recursively.

        The optional argument, pattern, limits the results to files
        with names that match the pattern.  For example,
        mydir.walkfiles('*.tmp') yields only files with the .tmp
        extension.
        """
        
    #@+node:vitalije.20170716201700.13: *4* _listdir
    def _listdir(self, s, pattern=None):
        """ D.listdir() -> List of items in this directory.

        Use D.files() or D.dirs() instead if you want a listing
        of just files or just subdirectories.

        The elements of the list are path objects.

        With the optional 'pattern' argument, this only lists
        items whose names match the given pattern.
        """
        names = os.listdir(s)
        if pattern is not None:
            names = fnmatch.filter(names, pattern)
        return [join(s, child) for child in names]
    #@+node:vitalije.20170716201700.14: *4* _fn_match
    def _fn_match(self, s, pattern):
        """ Return True if self.name matches the given pattern.

        pattern - A filename pattern with wildcards, for example '*.py'.
        """
        return fnmatch.fnmatch(basename(s), pattern)
    #@+node:vitalije.20170716201700.15: *3* clear (SqlitePickleShare)
[docs]    def clear(self, verbose=False):
        # Deletes all files in the fcache subdirectory.
        # It would be more thorough to delete everything
        # below the root directory, but it's not necessary.
        if verbose:
            g.red('clearing cache at directory...\n')
            g.es_print(self.root)
        self.conn.execute('delete from cachevalues;')
    #@+node:vitalije.20170716201700.16: *3* get
[docs]    def get(self, key, default=None):

        if not self.has_key(key):return default
        try:
            val = self[key]
            return val
        except KeyError:
            return default
    #@+node:vitalije.20170716201700.17: *3* has_key (PickleShareDB)
[docs]    def has_key(self, key):
        sql = 'select 1 from cachevalues where key=?;'
        for row in self.conn.execute(sql, (key,)):
            return True
        return False
    #@+node:vitalije.20170716201700.18: *3* items
[docs]    def items(self):
        sql = 'select key,data from cachevalues;'
        for key,data in self.conn.execute(sql):
            yield key, data
    #@+node:vitalije.20170716201700.19: *3* keys
    # Called by clear, and during unit testing.

[docs]    def keys(self, globpat=None):
        """Return all keys in DB, or all keys matching a glob"""
        if globpat is None:
            sql = 'select key from cachevalues;'
            args = tuple()
        else:
            sql = "select key from cachevalues where key glob ?;"
            # pylint: disable=trailing-comma-tuple
            args = globpat,
        for key in self.conn.execute(sql, args):
            yield key
    #@+node:vitalije.20170818091008.1: *3* reset_protocol_in_values
[docs]    def reset_protocol_in_values(self):
        PROTOCOLKEY = '__cache_pickle_protocol__'
        if self.get(PROTOCOLKEY, 3) == 2: return
        #@+others
        #@+node:vitalije.20170818115606.1: *4* viewrendered special case
        import json
        row = self.get('viewrendered_default_layouts') or (None, None)
        row = json.loads(json.dumps(row[0])), json.loads(json.dumps(row[1]))
        self['viewrendered_default_layouts'] = row
        #@+node:vitalije.20170818115617.1: *4* do_block
        def do_block(cur):
            itms = tuple((self.dumper(self.loader(v)), k) for k, v in cur)
            if itms:
                self.conn.executemany('update cachevalues set data=? where key=?', itms)
                self.conn.commit()
                return itms[-1][1]
            return None
        #@-others

        self.conn.isolation_level = 'DEFERRED'

        sql0 = '''select key, data from cachevalues order by key limit 50'''
        sql1 = '''select key, data from cachevalues where key > ? order by key limit 50'''


        block = self.conn.execute(sql0)
        lk = do_block(block)
        while lk:
            lk = do_block(self.conn.execute(sql1, (lk,)))
        self[PROTOCOLKEY] = 2
        self.conn.commit()

        self.conn.isolation_level = None
    #@+node:vitalije.20170716201700.23: *3* uncache
[docs]    def uncache(self, *items):
        """not used in SqlitePickleShare"""
        pass
    #@-others
#@-others
#@@language python
#@@tabwidth -4
#@@pagewidth 70
#@-leo