[plain text]

# -*- test-case-name: twisted.test.test_jelly -*-

# Twisted, the Framework of Your Internet
# Copyright (C) 2001 Matthew W. Lefkowitz
# This library is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""S-expression-based persistence of python objects.

Stability: semi-stable

Future Plans: Optimization.  Lots of optimization.  No semantic breakages
should be necessary, but if small tweaks are required to gain acceptable
large-scale performance then they will be made.  Although Glyph is the
maintainer, Bruce Mitchener will be supervising most of the optimization work

I do something very much like L{Pickle<pickle>}; however, pickle's main goal
seems to be efficiency (both in space and time); jelly's main goals are
security, human readability, and portability to other environments.

This is how Jelly converts various objects to s-expressions:

Boolean: True --> ['boolean', 'true']

Integer: 1 --> 1

List: [1, 2] --> ['list', 1, 2]

String: \"hello\" --> \"hello\"

Float: 2.3 --> 2.3

Dictionary: {'a' : 1, 'b' : 'c'} --> ['dictionary', ['b', 'c'], ['a', 1]]

Module: UserString --> ['module', 'UserString']

Class: UserString.UserString --> ['class', ['module', 'UserString'], 'UserString']

Function: string.join --> ['function', 'join', ['module', 'string']]

Instance: s is an instance of UserString.UserString, with a __dict__ {'data': 'hello'}:
[\"UserString.UserString\", ['dictionary', ['data', 'hello']]]

# ['instance', ['class', ['module', 'UserString'], 'UserString'], ['dictionary', ['data', 'hello']]]

Class Method:
['method', 'center', ['None'], ['class', ['module', 'UserString'], 'UserString']]

Instance Method:, where s is an instance of UserString.UserString:
['method', 'center', ['instance', ['reference', 1, ['class', ['module', 'UserString'], 'UserString']], ['dictionary', ['data', 'd']]], ['dereference', 1]]

@author: U{Glyph Lefkowitz<>}

__version__ = "$Revision: 1.2 $"[11:-2]

# System Imports
import string
import pickle
import sys
import types
from types import StringType
    from types import UnicodeType
except ImportError:
    UnicodeType = None
from types import IntType
from types import TupleType
from types import ListType
from types import DictType
from types import LongType
from types import FloatType
from types import FunctionType
from types import MethodType
from types import ModuleType
from types import DictionaryType
from types import InstanceType
from types import NoneType
from types import ClassType
import copy

    from types import BooleanType
except ImportError:
    BooleanType = None

from new import instance
from new import instancemethod

# Twisted Imports
from twisted.python.reflect import namedObject, namedModule, qual
from twisted.persisted.crefutil import NotKnown, _Tuple, _InstanceMethod, _DictKeyAndValue, _Dereference
from twisted.python import runtime

from twisted.spread.interfaces import IJellyable, IUnjellyable

if runtime.platform.getType() == "java":
    from org.python.core import PyStringMap
    DictTypes = (DictionaryType, PyStringMap)
    DictTypes = (DictionaryType,)

None_atom = "None"                  # N
# code
class_atom = "class"                # c
module_atom = "module"              # m
function_atom = "function"          # f

# references
dereference_atom = 'dereference'    # D
persistent_atom = 'persistent'      # p
reference_atom = 'reference'        # r

# mutable collections
dictionary_atom = "dictionary"      # d
list_atom = 'list'                  # l

# immutable collections
#   (assignment to __dict__ and __class__ still might go away!)
tuple_atom = "tuple"                # t
instance_atom = 'instance'          # i

# errors
unpersistable_atom = "unpersistable"# u
unjellyableRegistry = {}
unjellyableFactoryRegistry = {}

def _maybeClass(classnamep):
    except NameError:
        isObject = 0
        isObject = isinstance(classnamep, type)
    if isinstance(classnamep, ClassType) or isObject:
        return qual(classnamep)
    return classnamep

def setUnjellyableForClass(classname, unjellyable):
    """Set which local class will represent a remote type.

    If you have written a Copyable class that you expect your client to be
    receiving, write a local "copy" class to represent it, then call::

        jellier.setUnjellyableForClass('module.package.Class', MyJellier).

    Call this at the module level immediately after its class
    definition. MyCopier should be a subclass of RemoteCopy.

    The classname may be a special tag returned by
    'Copyable.getTypeToCopyFor' rather than an actual classname.

    This call is also for cached classes, since there will be no
    overlap.  The rules are the same.

    global unjellyableRegistry
    classname = _maybeClass(classname)
    unjellyableRegistry[classname] = unjellyable

def setUnjellyableFactoryForClass(classname, copyFactory):
    Set the factory to construct a remote instance of a type::

        jellier.setFactoryForClass('module.package.Class', MyFactory)

    Call this at the module level immediately after its class definition.
    C{copyFactory} should return an instance or subclass of

    Similar to L{setUnjellyableForClass} except it uses a factory instead
    of creating an instance.

    global unjellyableFactoryRegistry
    classname = _maybeClass(classname)
    unjellyableFactoryRegistry[classname] = copyFactory

def setUnjellyableForClassTree(module, baseClass, prefix=None):
    Set all classes in a module derived from C{baseClass} as copiers for
    a corresponding remote class.

    When you have a heirarchy of Copyable (or Cacheable) classes on
    one side, and a mirror structure of Copied (or RemoteCache)
    classes on the other, use this to setCopierForClass all your
    Copieds for the Copyables.

    Each copyTag (the \"classname\" argument to getTypeToCopyFor, and
    what the Copyable's getTypeToCopyFor returns) is formed from
    adding a prefix to the Copied's class name.  The prefix defaults
    to module.__name__.  If you wish the copy tag to consist of solely
    the classname, pass the empty string \'\'.

    @param module: a module object from which to pull the Copied classes.
        (passing sys.modules[__name__] might be useful)

    @param baseClass: the base class from which all your Copied classes derive.

    @param prefix: the string prefixed to classnames to form the
    if prefix is None:
        prefix = module.__name__

    if prefix:
        prefix = "%s." % prefix

    for i in dir(module):
        i_ = getattr(module, i)
        if type(i_) == types.ClassType:
            if issubclass(i_, baseClass):
                setUnjellyableForClass('%s%s' % (prefix, i), i_)

def getInstanceState(inst, jellier):
    """Utility method to default to 'normal' state rules in serialization.
    if hasattr(inst, "__getstate__"):
        state = inst.__getstate__()
        state = inst.__dict__
    sxp = jellier.prepare(inst)
    sxp.extend([qual(inst.__class__), jellier.jelly(state)])
    return jellier.preserve(inst, sxp)

def setInstanceState(inst, unjellier, jellyList):
    """Utility method to default to 'normal' state rules in unserialization.
    state = unjellier.unjelly(jellyList[1])
    if hasattr(inst, "__setstate__"):
        inst.__dict__ = state
    return inst

class Unpersistable:
    This is an instance of a class that comes back when something couldn't be
    def __init__(self, reason):
        Initialize an unpersistable object with a descriptive `reason' string.
        self.reason = reason

    def __repr__(self):
        return "Unpersistable(%s)" % repr(self.reason)

class Jellyable:
    Inherit from me to Jelly yourself directly with the `getStateFor'
    convenience method.

    __implements__ = IJellyable,
    def getStateFor(self, jellier):
        return self.__dict__

    def jellyFor(self, jellier):
        sxp = jellier.prepare(self)
        return jellier.preserve(self, sxp)

class Unjellyable:
    Inherit from me to Unjelly yourself directly with the
    `setStateFor' convenience method.

    __implements__ = IUnjellyable,
    def setStateFor(self, unjellier, state):
        self.__dict__ = state

    def unjellyFor(self, unjellier, jellyList):
        state = unjellier.unjelly(jellyList[1])
        self.setStateFor(unjellier, state)

class _Jellier:
    """(Internal) This class manages state for a call to jelly()
    def __init__(self, taster, persistentStore, invoker):
        self.taster = taster
        # `preserved' is a dict of previously seen instances.
        self.preserved = {}
        # `cooked' is a dict of previously backreferenced instances to their `ref' lists.
        self.cooked = {}
        self.cooker = {}
        self._ref_id = 1
        self.persistentStore = persistentStore
        self.invoker = invoker

    def _cook(self, object):

        backreference an object.

        Notes on this method for the hapless future maintainer: If I've already
        gone through the prepare/preserve cycle on the specified object (it is
        being referenced after the serializer is \"done with\" it, e.g. this
        reference is NOT circular), the copy-in-place of aList is relevant,
        since the list being modified is the actual, pre-existing jelly
        expression that was returned for that object. If not, it's technically
        superfluous, since the value in self.preserved didn't need to be set,
        but the invariant that self.preserved[id(object)] is a list is
        convenient because that means we don't have to test and create it or
        not create it here, creating fewer code-paths.  that's why
        self.preserved is always set to a list.

        Sorry that this code is so hard to follow, but Python objects are
        tricky to persist correctly. -glyph

        aList = self.preserved[id(object)]
        newList = copy.copy(aList)
        # make a new reference ID
        refid = self._ref_id
        self._ref_id = self._ref_id + 1
        # replace the old list in-place, so that we don't have to track the
        # previous reference to it.
        aList[:] = [reference_atom, refid, newList]
        self.cooked[id(object)] = [dereference_atom, refid]
        return aList

    def prepare(self, object):
        create a list for persisting an object to.  this will allow
        backreferences to be made internal to the object. (circular

        The reason this needs to happen is that we don't generate an ID for
        every object, so we won't necessarily know which ID the object will
        have in the future.  When it is 'cooked' ( see _cook ), it will be
        assigned an ID, and the temporary placeholder list created here will be
        modified in-place to create an expression that gives this object an ID:
        [reference id# [object-jelly]].

        # create a placeholder list to be preserved
        self.preserved[id(object)] = []
        # keep a reference to this object around, so it doesn't disappear!
        # (This isn't always necessary, but for cases where the objects are
        # dynamically generated by __getstate__ or getStateToCopyFor calls, it
        # is; id() will return the same value for a different object if it gets
        # garbage collected.  This may be optimized later.)
        self.cooker[id(object)] = object
        return []

    def preserve(self, object, sexp):
        mark an object's persistent list for later referral
        #if I've been cooked in the meanwhile,
        if self.cooked.has_key(id(object)):
            # replace the placeholder empty list with the real one
            self.preserved[id(object)][2] = sexp
            # but give this one back.
            sexp = self.preserved[id(object)]
            self.preserved[id(object)] = sexp
        return sexp

    constantTypes = {types.StringType : 1, types.IntType : 1,
                     types.FloatType : 1, types.LongType : 1}

    def _checkMutable(self,obj):
        objId = id(obj)
        if self.cooked.has_key(objId):
            return self.cooked[objId]
        if self.preserved.has_key(objId):
            return self.cooked[objId]

    def jelly(self, obj):
        if isinstance(obj, Jellyable):
            preRef = self._checkMutable(obj)
            if preRef:
                return preRef
            return obj.jellyFor(self)
        objType = type(obj)
        if self.taster.isTypeAllowed(
            string.replace(objType.__name__, ' ', '_')):
            # "Immutable" Types
            if ((objType is StringType) or
                (objType is IntType) or
                (objType is LongType) or
                (objType is FloatType)):
                return obj
            elif objType is MethodType:
                return ["method",

            elif UnicodeType and objType is UnicodeType:
                return ['unicode', obj.encode('UTF-8')]
            elif objType is NoneType:
                return ['None']
            elif objType is FunctionType:
                name = obj.__name__
                return ['function', str(pickle.whichmodule(obj, obj.__name__))
                        + '.' +
            elif objType is ModuleType:
                return ['module', obj.__name__]
            elif objType is BooleanType:
                return ['boolean', obj and 'true' or 'false']
            elif objType is ClassType or issubclass(type, objType):
                return ['class', qual(obj)]
                preRef = self._checkMutable(obj)
                if preRef:
                    return preRef
                # "Mutable" Types
                sxp = self.prepare(obj)
                if objType is ListType:
                    for item in obj:
                elif objType is TupleType:
                    for item in obj:
                elif objType in DictTypes:
                    for key, val in obj.items():
                        sxp.append([self.jelly(key), self.jelly(val)])
                elif objType is InstanceType:
                    className = qual(obj.__class__)
                    persistent = None
                    if self.persistentStore:
                        persistent = self.persistentStore(obj, self)
                    if persistent is not None:
                    elif self.taster.isClassAllowed(obj.__class__):
                        if hasattr(obj, "__getstate__"):
                            state = obj.__getstate__()
                            state = obj.__dict__
                            "instance of class %s deemed insecure" %
                            qual(obj.__class__), sxp)
                    raise NotImplementedError("Don't know the type: %s" % objType)
                return self.preserve(obj, sxp)
            if objType is types.InstanceType:
                raise InsecureJelly("Class not allowed for instance: %s %s" %
                                    (obj.__class__, obj))
            raise InsecureJelly("Type not allowed for object: %s %s" %
                                (objType, obj))

    def unpersistable(self, reason, sxp=None):
        Returns an sexp: (unpersistable "reason").  Utility method for making
        note that a particular object could not be serialized.
        if sxp is None:
            sxp = []
        return sxp

class _Unjellier:
    def __init__(self, taster, persistentLoad, invoker):
        self.taster = taster
        self.persistentLoad = persistentLoad
        self.references = {}
        self.postCallbacks = []
        self.invoker = invoker

    def unjellyFull(self, obj):
        o = self.unjelly(obj)
        for m in self.postCallbacks:
        return o

    def unjelly(self, obj):
        if type(obj) is not types.ListType:
            return obj
        jelType = obj[0]
        if not self.taster.isTypeAllowed(jelType):
            raise InsecureJelly(jelType)
        regClass = unjellyableRegistry.get(jelType)
        if regClass is not None:
            if isinstance(regClass, ClassType):
                inst = _Dummy() # XXX chomp, chomp
                inst.__class__ = regClass
                method = inst.unjellyFor
                method = regClass # this is how it ought to be done
            val = method(self, obj)
            if hasattr(val, 'postUnjelly'):
            return val
        regFactory = unjellyableFactoryRegistry.get(jelType)
        if regFactory is not None:
            state = self.unjelly(obj[1])
            inst = regFactory(state)
            if hasattr(inst, 'postUnjelly'):
            return inst
        thunk = getattr(self, '_unjelly_%s'%jelType, None)
        if thunk is not None:
            ret = thunk(obj[1:])
            nameSplit = string.split(jelType, '.')
            modName = string.join(nameSplit[:-1], '.')
            if not self.taster.isModuleAllowed(modName):
                raise InsecureJelly("Module %s not allowed (in type %s)." % (modName, jelType))
            clz = namedObject(jelType)
            if not self.taster.isClassAllowed(clz):
                raise InsecureJelly("Class %s not allowed." % jelType)
            if hasattr(clz, "__setstate__"):
                ret = instance(clz, {})
                state = self.unjelly(obj[1])
                state = self.unjelly(obj[1])
                ret = instance(clz, state)
            if hasattr(clz, 'postUnjelly'):
        return ret

    def _unjelly_None(self, exp):
        return None

    def _unjelly_unicode(self, exp):
        if UnicodeType:
            return unicode(exp[0], "UTF-8")
            return Unpersistable(exp[0])

    def _unjelly_boolean(self, exp):
        if BooleanType:
            assert exp[0] in ('true', 'false')
            return exp[0] == 'true'
            return Unpersistable(exp[0])

    def unjellyInto(self, obj, loc, jel):
        o = self.unjelly(jel)
        if isinstance(o, NotKnown):
            o.addDependant(obj, loc)
        obj[loc] = o
        return o

    def _unjelly_dereference(self, lst):
        refid = lst[0]
        x = self.references.get(refid)
        if x is not None:
            return x
        der = _Dereference(refid)
        self.references[refid] = der
        return der

    def _unjelly_reference(self, lst):
        refid = lst[0]
        exp = lst[1]
        o = self.unjelly(exp)
        ref = self.references.get(refid)
        if (ref is None):
            self.references[refid] = o
        elif isinstance(ref, NotKnown):
            self.references[refid] = o
            assert 0, "Multiple references with same ID!"
        return o

    def _unjelly_tuple(self, lst):
        l = range(len(lst))
        finished = 1
        for elem in l:
            if isinstance(self.unjellyInto(l, elem, lst[elem]), NotKnown):
                finished = 0
        if finished:
            return tuple(l)
            return _Tuple(l)

    def _unjelly_list(self, lst):
        l = range(len(lst))
        for elem in l:
            self.unjellyInto(l, elem, lst[elem])
        return l

    def _unjelly_dictionary(self, lst):
        d = {}
        for k, v in lst:
            kvd = _DictKeyAndValue(d)
            self.unjellyInto(kvd, 0, k)
            self.unjellyInto(kvd, 1, v)
        return d

    def _unjelly_module(self, rest):
        moduleName = rest[0]
        if type(moduleName) != types.StringType:
            raise InsecureJelly("Attempted to unjelly a module with a non-string name.")
        if not self.taster.isModuleAllowed(moduleName):
            raise InsecureJelly("Attempted to unjelly module named %s" % repr(moduleName))
        mod = __import__(moduleName, {}, {},"x")
        return mod

    def _unjelly_class(self, rest):
        clist = string.split(rest[0], '.')
        modName = string.join(clist[:-1], '.')
        if not self.taster.isModuleAllowed(modName):
            raise InsecureJelly("module %s not allowed" % modName)
        klaus = namedObject(rest[0])
        if type(klaus) is not types.ClassType:
            raise InsecureJelly("class %s unjellied to something that isn't a class: %s" % (repr(name), repr(klaus)))
        if not self.taster.isClassAllowed(klaus):
            raise InsecureJelly("class not allowed: %s" % qual(klaus))
        return klaus

    def _unjelly_function(self, rest):
        modSplit = string.split(rest[0], '.')
        modName = string.join(modSplit[:-1], '.')
        if not self.taster.isModuleAllowed(modName):
            raise InsecureJelly("Module not allowed: %s"% modName)
        # XXX do I need an isFunctionAllowed?
        function = namedObject(rest[0])
        return function

    def _unjelly_persistent(self, rest):
        if self.persistentLoad:
            pload = self.persistentLoad(rest[0], self)
            return pload
            return Unpersistable("persistent callback not found")

    def _unjelly_instance(self, rest):
        clz = self.unjelly(rest[0])
        if type(clz) is not types.ClassType:
            raise InsecureJelly("Instance found with non-class class.")
        if hasattr(clz, "__setstate__"):
            inst = instance(clz, {})
            state = self.unjelly(rest[1])
            state = self.unjelly(rest[1])
            inst = instance(clz, state)
        if hasattr(clz, 'postUnjelly'):
        return inst

    def _unjelly_unpersistable(self, rest):
        return Unpersistable(rest[0])

    def _unjelly_method(self, rest):
        ''' (internal) unjelly a method
        im_name = rest[0]
        im_self = self.unjelly(rest[1])
        im_class = self.unjelly(rest[2])
        if type(im_class) is not types.ClassType:
            raise InsecureJelly("Method found with non-class class.")
        if im_class.__dict__.has_key(im_name):
            if im_self is None:
                im = getattr(im_class, im_name)
            elif isinstance(im_self, NotKnown):
                im = _InstanceMethod(im_name, im_self, im_class)
                im = instancemethod(im_class.__dict__[im_name],
            raise 'instance method changed'
        return im

class _Dummy:
    Dummy class, used for unserializing instances.

#### Published Interface.

class InsecureJelly(Exception):
    This exception will be raised when a jelly is deemed `insecure'; e.g. it
    contains a type, class, or module disallowed by the specified `taster'

class DummySecurityOptions:
    """DummySecurityOptions() -> insecure security options
    Dummy security options -- this class will allow anything.
    def isModuleAllowed(self, moduleName):
        """DummySecurityOptions.isModuleAllowed(moduleName) -> boolean
        returns 1 if a module by that name is allowed, 0 otherwise
        return 1

    def isClassAllowed(self, klass):
        """DummySecurityOptions.isClassAllowed(class) -> boolean
        Assumes the module has already been allowed.  Returns 1 if the given
        class is allowed, 0 otherwise.
        return 1

    def isTypeAllowed(self, typeName):
        """DummySecurityOptions.isTypeAllowed(typeName) -> boolean
        Returns 1 if the given type is allowed, 0 otherwise.
        return 1

class SecurityOptions:
    This will by default disallow everything, except for 'none'.

    basicTypes = ["dictionary", "list", "tuple",
                  "reference", "dereference", "unpersistable",
                  "persistent", "long_int", "long", "dict"]

    def __init__(self):
        # I don't believe any of these types can ever pose a security hazard,
        # except perhaps "reference"...
        self.allowedTypes = {"None": 1,
                             "bool": 1,
                             "boolean": 1,
                             "string": 1,
                             "str": 1,
                             "int": 1,
                             "float": 1,
                             "NoneType": 1}
        if hasattr(types, 'UnicodeType'):
            self.allowedTypes['unicode'] = 1
        self.allowedModules = {}
        self.allowedClasses = {}

    def allowBasicTypes(self):
        Allow all `basic' types.  (Dictionary and list.  Int, string, and float are implicitly allowed.)

    def allowTypes(self, *types):
        """SecurityOptions.allowTypes(typeString): Allow a particular type, by its name.
        for typ in types:
            self.allowedTypes[string.replace(typ, ' ', '_')]=1

    def allowInstancesOf(self, *classes):
        """SecurityOptions.allowInstances(klass, klass, ...): allow instances
        of the specified classes

        This will also allow the 'instance', 'class' (renamed 'classobj' in
        Python 2.3), and 'module' types, as well as basic types.
        self.allowTypes("instance", "class", "classobj", "module")
        for klass in classes:
            self.allowedClasses[klass] = 1

    def allowModules(self, *modules):
        """SecurityOptions.allowModules(module, module, ...): allow modules by name
        This will also allow the 'module' type.
        for module in modules:
            if type(module) == types.ModuleType:
                module = module.__name__
            self.allowedModules[module] = 1

    def isModuleAllowed(self, moduleName):
        """SecurityOptions.isModuleAllowed(moduleName) -> boolean
        returns 1 if a module by that name is allowed, 0 otherwise
        return self.allowedModules.has_key(moduleName)

    def isClassAllowed(self, klass):
        """SecurityOptions.isClassAllowed(class) -> boolean
        Assumes the module has already been allowed.  Returns 1 if the given
        class is allowed, 0 otherwise.
        return self.allowedClasses.has_key(klass)

    def isTypeAllowed(self, typeName):
        """SecurityOptions.isTypeAllowed(typeName) -> boolean
        Returns 1 if the given type is allowed, 0 otherwise.
        return (self.allowedTypes.has_key(typeName) or
                '.' in typeName)

globalSecurity = SecurityOptions()

def jelly(object, taster = DummySecurityOptions(), persistentStore=None, invoker=None):
    """Serialize to s-expression.

    Returns a list which is the serialized representation of an object.  An
    optional 'taster' argument takes a SecurityOptions and will mark any
    insecure objects as unpersistable rather than serializing them.
    return _Jellier(taster, persistentStore, invoker).jelly(object)

def unjelly(sexp, taster = DummySecurityOptions(), persistentLoad=None, invoker=None):
    """Unserialize from s-expression.

    Takes an list that was the result from a call to jelly() and unserializes
    an arbitrary object from it.  The optional 'taster' argument, an instance
    of SecurityOptions, will cause an InsecureJelly exception to be raised if a
    disallowed type, module, or class attempted to unserialize.
    return _Unjellier(taster, persistentLoad, invoker).unjellyFull(sexp)