Merge pull request #1117 from adamcik/feature/models-memory-reduction

Improve models memory usage
This commit is contained in:
Stein Magnus Jodal 2015-04-08 23:29:08 +02:00
commit c367d350f7
6 changed files with 164 additions and 95 deletions

View File

@ -8,7 +8,7 @@ and immutable. In other words, they can only be set through the class
constructor during instance creation.
If you want to modify a model, use the
:meth:`~mopidy.models.ImmutableObject.copy` method. It accepts keyword
:meth:`~mopidy.models.ImmutableObject.replace` method. It accepts keyword
arguments for the parts of the model you want to change, and copies the rest of
the data from the model you call it on. Example::
@ -16,7 +16,7 @@ the data from the model you call it on. Example::
>>> track1 = Track(name='Christmas Carol', length=171)
>>> track1
Track(artists=[], length=171, name='Christmas Carol')
>>> track2 = track1.copy(length=37)
>>> track2 = track1.replace(length=37)
>>> track2
Track(artists=[], length=37, name='Christmas Carol')
>>> track1
@ -75,6 +75,7 @@ Data model helpers
==================
.. autoclass:: mopidy.models.ImmutableObject
:members:
.. autoclass:: mopidy.models.Field

View File

@ -22,6 +22,11 @@ Models
- Added type checks and other sanity checks to model construction and
serialization. (Fixes: :issue:`865`)
- Memory usage for models has been greatly improved. We now have a lower
overhead per instance by using slots, intern identifiers and automatically
reuse instances. For the test data set this was developed against, a library
of ~14000 tracks, went from needing ~75MB to ~17MB. (Fixes: :issue:`348`)
Internal changes
----------------

View File

@ -1,6 +1,10 @@
from __future__ import absolute_import, unicode_literals
import copy
import json
import weakref
from mopidy.utils import deprecation
# TODO: split into base models, serialization and fields?
@ -23,7 +27,7 @@ class Field(object):
"""
def __init__(self, default=None, type=None, choices=None):
self._name = None # Set by FieldOwner
self._name = None # Set by ImmutableObjectMeta
self._choices = choices
self._default = default
self._type = type
@ -44,7 +48,7 @@ class Field(object):
def __get__(self, instance, owner):
if not instance:
return self
return instance.__dict__.get(self._name, self._default)
return getattr(instance, '_' + self._name, self._default)
def __set__(self, instance, value):
if value is not None:
@ -53,10 +57,11 @@ class Field(object):
if value is None or value == self._default:
self.__delete__(instance)
else:
instance.__dict__[self._name] = value
setattr(instance, '_' + self._name, value)
def __delete__(self, instance):
instance.__dict__.pop(self._name, None)
if hasattr(instance, '_' + self._name):
delattr(instance, '_' + self._name)
class String(Field):
@ -74,6 +79,11 @@ class String(Field):
super(String, self).__init__(type=basestring, default=default)
class Identifier(String):
def validate(self, value):
return intern(str(super(Identifier, self).validate(value)))
class Integer(Field):
"""
@ -123,18 +133,32 @@ class Collection(Field):
return self._default.__class__(value) or None
class FieldOwner(type):
class ImmutableObjectMeta(type):
"""Helper to automatically assign field names to descriptors."""
def __new__(cls, name, bases, attrs):
attrs['_fields'] = []
fields = {}
for key, value in attrs.items():
if isinstance(value, Field):
attrs['_fields'].append(key)
fields[key] = '_' + key
value._name = key
attrs['_fields'].sort()
return super(FieldOwner, cls).__new__(cls, name, bases, attrs)
attrs['_fields'] = fields
attrs['__slots__'] = fields.values()
attrs['_instances'] = weakref.WeakValueDictionary()
for base in bases:
if '__weakref__' in getattr(base, '__slots__', []):
break
else:
attrs['__slots__'].append('__weakref__')
return super(ImmutableObjectMeta, cls).__new__(cls, name, bases, attrs)
def __call__(cls, *args, **kwargs): # noqa: N805
instance = super(ImmutableObjectMeta, cls).__call__(*args, **kwargs)
return cls._instances.setdefault(weakref.ref(instance), instance)
class ImmutableObject(object):
@ -144,11 +168,15 @@ class ImmutableObject(object):
constructor. Fields should be :class:`Field` instances to ensure type
safety in our models.
Note that since these models can not be changed, we heavily memoize them
to save memory. So constructing a class with the same arguments twice will
give you the same instance twice.
:param kwargs: kwargs to set as fields on the object
:type kwargs: any
"""
__metaclass__ = FieldOwner
__metaclass__ = ImmutableObjectMeta
def __init__(self, *args, **kwargs):
for key, value in kwargs.items():
@ -159,14 +187,23 @@ class ImmutableObject(object):
super(ImmutableObject, self).__setattr__(key, value)
def __setattr__(self, name, value):
if name in self.__slots__:
return super(ImmutableObject, self).__setattr__(name, value)
raise AttributeError('Object is immutable.')
def __delattr__(self, name):
if name in self.__slots__:
return super(ImmutableObject, self).__delattr__(name)
raise AttributeError('Object is immutable.')
def _items(self):
for field, key in self._fields.items():
if hasattr(self, key):
yield field, getattr(self, key)
def __repr__(self):
kwarg_pairs = []
for (key, value) in sorted(self.__dict__.items()):
for key, value in sorted(self._items()):
if isinstance(value, (frozenset, tuple)):
if not value:
continue
@ -179,47 +216,59 @@ class ImmutableObject(object):
def __hash__(self):
hash_sum = 0
for key, value in self.__dict__.items():
for key, value in self._items():
hash_sum += hash(key) + hash(value)
return hash_sum
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.__dict__ == other.__dict__
return dict(self._items()) == dict(other._items())
def __ne__(self, other):
return not self.__eq__(other)
def copy(self, **values):
"""
Copy the model with ``field`` updated to new value.
.. deprecated:: 1.1
Use :meth:`replace` instead. Note that we no longer return copies.
"""
deprecation.warn('model.immutable.copy')
return self.replace(**values)
def replace(self, **kwargs):
"""
Replace the fields in the model and return a new instance
Examples::
# Returns a track with a new name
Track(name='foo').copy(name='bar')
Track(name='foo').replace(name='bar')
# Return an album with a new number of tracks
Album(num_tracks=2).copy(num_tracks=5)
Album(num_tracks=2).replace(num_tracks=5)
:param values: the model fields to modify
:type values: dict
:rtype: new instance of the model being copied
Note that internally we memoize heavily to keep memory usage down given
our overly repetitive data structures. So you might get an existing
instance if it contains the same values.
:param kwargs: kwargs to set as fields on the object
:type kwargs: any
:rtype: instance of the model with replaced fields
"""
other = self.__class__()
other.__dict__.update(self.__dict__)
for key, value in values.items():
if not kwargs:
return self
other = copy.copy(self)
for key, value in kwargs.items():
if key not in self._fields:
raise TypeError(
'copy() got an unexpected keyword argument "%s"' % key)
super(ImmutableObject, other).__setattr__(key, value)
return other
return self._instances.setdefault(weakref.ref(other), other)
def serialize(self):
data = {}
data['__model__'] = self.__class__.__name__
for key, value in self.__dict__.items():
for key, value in self._items():
if isinstance(value, (set, frozenset, list, tuple)):
value = [
v.serialize() if isinstance(v, ImmutableObject) else v
@ -264,13 +313,11 @@ def model_json_decoder(dct):
"""
if '__model__' in dct:
# TODO: move models to a global constant once we split this module
models = {c.__name__: c for c in ImmutableObject.__subclasses__()}
model_name = dct.pop('__model__')
cls = globals().get(model_name, None)
if issubclass(cls, ImmutableObject):
kwargs = {}
for key, value in dct.items():
kwargs[key] = value
return cls(**kwargs)
if model_name in models:
return models[model_name](**dct)
return dct
@ -290,7 +337,7 @@ class Ref(ImmutableObject):
"""
#: The object URI. Read-only.
uri = String()
uri = Identifier()
#: The object name. Read-only.
name = String()
@ -354,7 +401,7 @@ class Image(ImmutableObject):
"""
#: The image URI. Read-only.
uri = String()
uri = Identifier()
#: Optional width of the image or :class:`None`. Read-only.
width = Integer(min=0)
@ -375,13 +422,13 @@ class Artist(ImmutableObject):
"""
#: The artist URI. Read-only.
uri = String()
uri = Identifier()
#: The artist name. Read-only.
name = String()
#: The MusicBrainz ID of the artist. Read-only.
musicbrainz_id = String()
musicbrainz_id = Identifier()
class Album(ImmutableObject):
@ -406,7 +453,7 @@ class Album(ImmutableObject):
"""
#: The album URI. Read-only.
uri = String()
uri = Identifier()
#: The album name. Read-only.
name = String()
@ -424,7 +471,7 @@ class Album(ImmutableObject):
date = String() # TODO: add date type
#: The MusicBrainz ID of the album. Read-only.
musicbrainz_id = String()
musicbrainz_id = Identifier()
#: The album image URIs. Read-only.
images = Collection(type=basestring, container=frozenset)
@ -469,7 +516,7 @@ class Track(ImmutableObject):
"""
#: The track URI. Read-only.
uri = String()
uri = Identifier()
#: The track name. Read-only.
name = String()
@ -508,7 +555,7 @@ class Track(ImmutableObject):
comment = String()
#: The MusicBrainz ID of the track. Read-only.
musicbrainz_id = String()
musicbrainz_id = Identifier()
#: Integer representing when the track was last modified. Exact meaning
#: depends on source of track. For local files this is the modification
@ -571,7 +618,7 @@ class Playlist(ImmutableObject):
"""
#: The playlist URI. Read-only.
uri = String()
uri = Identifier()
#: The playlist name. Read-only.
name = String()
@ -607,7 +654,7 @@ class SearchResult(ImmutableObject):
"""
# The search result URI. Read-only.
uri = String()
uri = Identifier()
# The tracks matching the search query. Read-only.
tracks = Collection(type=Track, container=tuple)

View File

@ -40,6 +40,9 @@ _MESSAGES = {
'tracklist.add() "tracks" argument is deprecated',
'core.tracklist.add:uri_arg':
'tracklist.add() "uri" argument is deprecated',
'models.immutable.copy':
'ImmutableObject.copy() is deprecated, use ImmutableObject.replace()',
}

View File

@ -9,7 +9,7 @@ def create_instance(field):
"""Create an instance of a dummy class for testing fields."""
class Dummy(object):
__metaclass__ = FieldOwner
__metaclass__ = ImmutableObjectMeta
attr = field
return Dummy()
@ -29,15 +29,14 @@ class FieldDescriptorTest(unittest.TestCase):
instance = create_instance(Field())
self.assertIsNone(instance.attr)
def test_field_does_not_store_default_in_dict(self):
def test_field_does_not_store_default(self):
instance = create_instance(Field())
self.assertNotIn('attr', instance.__dict__)
self.assertFalse(hasattr(instance, '_attr'))
def test_field_assigment_and_retrival(self):
instance = create_instance(Field())
instance.attr = 1234
self.assertEqual(1234, instance.attr)
self.assertEqual(1234, instance.__dict__['attr'])
def test_field_can_be_reassigned(self):
instance = create_instance(Field())
@ -50,14 +49,14 @@ class FieldDescriptorTest(unittest.TestCase):
instance.attr = 1234
del instance.attr
self.assertEqual(None, instance.attr)
self.assertNotIn('attr', instance.__dict__)
self.assertFalse(hasattr(instance, '_attr'))
def test_field_can_be_set_to_none(self):
instance = create_instance(Field())
instance.attr = 1234
instance.attr = None
self.assertEqual(None, instance.attr)
self.assertNotIn('attr', instance.__dict__)
self.assertFalse(hasattr(instance, '_attr'))
def test_field_can_be_set_default(self):
default = object()
@ -65,7 +64,7 @@ class FieldDescriptorTest(unittest.TestCase):
instance.attr = 1234
instance.attr = default
self.assertEqual(default, instance.attr)
self.assertNotIn('attr', instance.__dict__)
self.assertFalse(hasattr(instance, '_attr'))
class FieldTest(unittest.TestCase):
@ -183,13 +182,11 @@ class CollectionTest(unittest.TestCase):
instance = create_instance(Collection(type=int, container=frozenset))
instance.attr = []
self.assertEqual(frozenset(), instance.attr)
self.assertNotIn('attr', instance.__dict__)
def test_collection_gets_stored_in_container(self):
instance = create_instance(Collection(type=int, container=frozenset))
instance.attr = [1, 2, 3]
self.assertEqual(frozenset([1, 2, 3]), instance.attr)
self.assertEqual(frozenset([1, 2, 3]), instance.__dict__['attr'])
def test_collection_with_wrong_type(self):
instance = create_instance(Collection(type=int, container=frozenset))

View File

@ -8,54 +8,70 @@ from mopidy.models import (
TlTrack, Track, model_json_decoder)
class GenericCopyTest(unittest.TestCase):
class CachingTest(unittest.TestCase):
def test_same_instance(self):
self.assertIs(Track(), Track())
def test_same_instance_with_values(self):
self.assertIs(Track(uri='test'), Track(uri='test'))
def test_different_instance_with_different_values(self):
self.assertIsNot(Track(uri='test1'), Track(uri='test2'))
def test_different_instance_with_replace(self):
t = Track(uri='test1')
self.assertIsNot(t, t.replace(uri='test2'))
class GenericReplaceTest(unittest.TestCase):
def compare(self, orig, other):
self.assertEqual(orig, other)
self.assertNotEqual(id(orig), id(other))
self.assertEqual(id(orig), id(other))
def test_copying_track(self):
def test_replace_track(self):
track = Track()
self.compare(track, track.copy())
self.compare(track, track.replace())
def test_copying_artist(self):
def test_replace_artist(self):
artist = Artist()
self.compare(artist, artist.copy())
self.compare(artist, artist.replace())
def test_copying_album(self):
def test_replace_album(self):
album = Album()
self.compare(album, album.copy())
self.compare(album, album.replace())
def test_copying_playlist(self):
def test_replace_playlist(self):
playlist = Playlist()
self.compare(playlist, playlist.copy())
self.compare(playlist, playlist.replace())
def test_copying_track_with_basic_values(self):
def test_replace_track_with_basic_values(self):
track = Track(name='foo', uri='bar')
copy = track.copy(name='baz')
self.assertEqual('baz', copy.name)
self.assertEqual('bar', copy.uri)
other = track.replace(name='baz')
self.assertEqual('baz', other.name)
self.assertEqual('bar', other.uri)
def test_copying_track_with_missing_values(self):
def test_replace_track_with_missing_values(self):
track = Track(uri='bar')
copy = track.copy(name='baz')
self.assertEqual('baz', copy.name)
self.assertEqual('bar', copy.uri)
other = track.replace(name='baz')
self.assertEqual('baz', other.name)
self.assertEqual('bar', other.uri)
def test_copying_track_with_private_internal_value(self):
def test_replace_track_with_private_internal_value(self):
artist1 = Artist(name='foo')
artist2 = Artist(name='bar')
track = Track(artists=[artist1])
copy = track.copy(artists=[artist2])
self.assertIn(artist2, copy.artists)
other = track.replace(artists=[artist2])
self.assertIn(artist2, other.artists)
def test_copying_track_with_invalid_key(self):
def test_replace_track_with_invalid_key(self):
with self.assertRaises(TypeError):
Track().copy(invalid_key=True)
Track().replace(invalid_key=True)
def test_copying_track_to_remove(self):
track = Track(name='foo').copy(name=None)
self.assertEqual(track.__dict__, Track().__dict__)
def test_replace_track_to_remove(self):
track = Track(name='foo').replace(name=None)
self.assertFalse(hasattr(track, '_name'))
class RefTest(unittest.TestCase):
@ -86,7 +102,7 @@ class RefTest(unittest.TestCase):
def test_repr_without_results(self):
self.assertEqual(
"Ref(name=u'foo', type=u'artist', uri=u'uri')",
"Ref(name=u'foo', type=u'artist', uri='uri')",
repr(Ref(uri='uri', name='foo', type='artist')))
def test_serialize_without_results(self):
@ -193,14 +209,14 @@ class ArtistTest(unittest.TestCase):
def test_invalid_kwarg_with_name_matching_method(self):
with self.assertRaises(TypeError):
Artist(copy='baz')
Artist(replace='baz')
with self.assertRaises(TypeError):
Artist(serialize='baz')
def test_repr(self):
self.assertEqual(
"Artist(name=u'name', uri=u'uri')",
"Artist(name=u'name', uri='uri')",
repr(Artist(uri='uri', name='name')))
def test_serialize(self):
@ -365,12 +381,12 @@ class AlbumTest(unittest.TestCase):
def test_repr_without_artists(self):
self.assertEqual(
"Album(name=u'name', uri=u'uri')",
"Album(name=u'name', uri='uri')",
repr(Album(uri='uri', name='name')))
def test_repr_with_artists(self):
self.assertEqual(
"Album(artists=[Artist(name=u'foo')], name=u'name', uri=u'uri')",
"Album(artists=[Artist(name=u'foo')], name=u'name', uri='uri')",
repr(Album(uri='uri', name='name', artists=[Artist(name='foo')])))
def test_serialize_without_artists(self):
@ -609,12 +625,12 @@ class TrackTest(unittest.TestCase):
def test_repr_without_artists(self):
self.assertEqual(
"Track(name=u'name', uri=u'uri')",
"Track(name=u'name', uri='uri')",
repr(Track(uri='uri', name='name')))
def test_repr_with_artists(self):
self.assertEqual(
"Track(artists=[Artist(name=u'foo')], name=u'name', uri=u'uri')",
"Track(artists=[Artist(name=u'foo')], name=u'name', uri='uri')",
repr(Track(uri='uri', name='name', artists=[Artist(name='foo')])))
def test_serialize_without_artists(self):
@ -800,9 +816,9 @@ class TrackTest(unittest.TestCase):
self.assertEqual(track1, track2)
self.assertEqual(hash(track1), hash(track2))
def test_copy_can_reset_to_default_value(self):
def test_replace_can_reset_to_default_value(self):
track1 = Track(name='name1')
track2 = Track(name='name1', album=Album()).copy(album=None)
track2 = Track(name='name1', album=Album()).replace(album=None)
self.assertEqual(track1, track2)
self.assertEqual(hash(track1), hash(track2))
@ -844,7 +860,7 @@ class TlTrackTest(unittest.TestCase):
def test_repr(self):
self.assertEqual(
"TlTrack(tlid=123, track=Track(uri=u'uri'))",
"TlTrack(tlid=123, track=Track(uri='uri'))",
repr(TlTrack(tlid=123, track=Track(uri='uri'))))
def test_serialize(self):
@ -927,7 +943,7 @@ class PlaylistTest(unittest.TestCase):
playlist = Playlist(
uri='an uri', name='a name', tracks=tracks,
last_modified=last_modified)
new_playlist = playlist.copy(uri='another uri')
new_playlist = playlist.replace(uri='another uri')
self.assertEqual(new_playlist.uri, 'another uri')
self.assertEqual(new_playlist.name, 'a name')
self.assertEqual(list(new_playlist.tracks), tracks)
@ -939,7 +955,7 @@ class PlaylistTest(unittest.TestCase):
playlist = Playlist(
uri='an uri', name='a name', tracks=tracks,
last_modified=last_modified)
new_playlist = playlist.copy(name='another name')
new_playlist = playlist.replace(name='another name')
self.assertEqual(new_playlist.uri, 'an uri')
self.assertEqual(new_playlist.name, 'another name')
self.assertEqual(list(new_playlist.tracks), tracks)
@ -952,7 +968,7 @@ class PlaylistTest(unittest.TestCase):
uri='an uri', name='a name', tracks=tracks,
last_modified=last_modified)
new_tracks = [Track(), Track()]
new_playlist = playlist.copy(tracks=new_tracks)
new_playlist = playlist.replace(tracks=new_tracks)
self.assertEqual(new_playlist.uri, 'an uri')
self.assertEqual(new_playlist.name, 'a name')
self.assertEqual(list(new_playlist.tracks), new_tracks)
@ -965,7 +981,7 @@ class PlaylistTest(unittest.TestCase):
playlist = Playlist(
uri='an uri', name='a name', tracks=tracks,
last_modified=last_modified)
new_playlist = playlist.copy(last_modified=new_last_modified)
new_playlist = playlist.replace(last_modified=new_last_modified)
self.assertEqual(new_playlist.uri, 'an uri')
self.assertEqual(new_playlist.name, 'a name')
self.assertEqual(list(new_playlist.tracks), tracks)
@ -977,12 +993,12 @@ class PlaylistTest(unittest.TestCase):
def test_repr_without_tracks(self):
self.assertEqual(
"Playlist(name=u'name', uri=u'uri')",
"Playlist(name=u'name', uri='uri')",
repr(Playlist(uri='uri', name='name')))
def test_repr_with_tracks(self):
self.assertEqual(
"Playlist(name=u'name', tracks=[Track(name=u'foo')], uri=u'uri')",
"Playlist(name=u'name', tracks=[Track(name=u'foo')], uri='uri')",
repr(Playlist(uri='uri', name='name', tracks=[Track(name='foo')])))
def test_serialize_without_tracks(self):
@ -1114,7 +1130,7 @@ class SearchResultTest(unittest.TestCase):
def test_repr_without_results(self):
self.assertEqual(
"SearchResult(uri=u'uri')",
"SearchResult(uri='uri')",
repr(SearchResult(uri='uri')))
def test_serialize_without_results(self):