Skip to content
This repository has been archived by the owner on Apr 11, 2022. It is now read-only.

Moved analytics configuration to the database. #551

Merged
merged 7 commits into from
Jun 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 33 additions & 63 deletions analytics.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,41 @@
from nose.tools import set_trace
import importlib
import contextlib
import datetime
from config import Configuration
from collections import defaultdict
from model import ExternalIntegration
from config import CannotLoadConfiguration

class Analytics(object):

__instance = None

if '.' in __module__:
# We are operating in an application that imports this product
# as a package (probably called 'core'). The module name of
# the analytics provider should be scoped to the name of the
# package, i.e. 'core.local_analytics_provider'.
package_name = __module__[:__module__.rfind('.')+1]
else:
# This application is not imported as a package, probably
# because we're running its unit tests.
package_name = ''

DEFAULT_PROVIDERS = [package_name + "local_analytics_provider"]

@classmethod
def instance(cls):
if not cls.__instance:
config = Configuration.instance
providers = cls.load_providers_from_config(config)
cls.initialize(providers, config)
return cls.__instance

@classmethod
def initialize(cls, providers, config):
if not providers:
cls.__instance = cls()
return cls.__instance
if isinstance(providers, basestring):
providers = [providers]
analytics_providers = []
for provider_string in providers:
provider_module = importlib.import_module(provider_string)
provider_class = getattr(provider_module, "Provider")
analytics_providers.append(provider_class.from_config(config))
cls.__instance = cls(analytics_providers)
return cls.__instance

def __init__(self, providers=[]):
self.providers = providers

@classmethod
def collect_event(cls, _db, license_pool, event_type, time=None, **kwargs):
def __init__(self, _db):
self.sitewide_providers = []
self.library_providers = defaultdict(list)
self.initialization_exceptions = {}

# Find a list of all the ExternalIntegrations set up with a
# goal of analytics.
integrations = _db.query(ExternalIntegration).filter(ExternalIntegration.goal==ExternalIntegration.ANALYTICS_GOAL)
# Turn each integration into an analytics provider.
for integration in integrations:
try:
provider_module = importlib.import_module(integration.protocol)
provider_class = getattr(provider_module, "Provider", None)
if provider_class:
if not integration.libraries:
provider = provider_class(integration)
self.sitewide_providers.append(provider)
else:
for library in integration.libraries:
provider = provider_class(integration, library)
self.library_providers[library.id].append(provider)
else:
self.initialization_exceptions[integration.id] = "Module %s does not have Provider defined." % integration.protocol
except (ImportError, CannotLoadConfiguration), e:
self.initialization_exceptions[integration.id] = e

def collect_event(self, library, license_pool, event_type, time=None, **kwargs):
if not time:
time = datetime.datetime.utcnow()
for provider in cls.instance().providers:
provider.collect_event(_db, license_pool, event_type, time, **kwargs)

@classmethod
def load_providers_from_config(cls, config):
policies = config.get(Configuration.POLICIES, {})
return policies.get(Configuration.ANALYTICS_POLICY, cls.DEFAULT_PROVIDERS)


@contextlib.contextmanager
def temp_analytics(providers, config):
"""A context manager to temporarily replace the analytics providers
used by a test.
"""
old_instance = Analytics._Analytics__instance
Analytics.initialize(providers, config)
yield
Analytics._Analytics__instance = old_instance

for provider in (self.sitewide_providers + self.library_providers[library.id]):
provider.collect_event(library, license_pool, event_type, time, **kwargs)
6 changes: 5 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ class Configuration(object):
},
{
"key": GROUPED_MAX_AGE_POLICY,
"label": _("Cache time for grouped OPDS feeds")
"label": _("Cache time for grouped OPDS feeds"),
},
{
"key": BASE_URL_KEY,
"label": _("Base url of the application"),
},
]

Expand Down
10 changes: 4 additions & 6 deletions coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def __init__(self, _db, collection=None, input_identifiers=None,
self.collection_id = collection.id
self.input_identifiers = input_identifiers
self.replacement_policy = (
replacement_policy or self._default_replacement_policy
replacement_policy or self._default_replacement_policy(_db)
)

if not self.DATA_SOURCE_NAME:
Expand All @@ -457,8 +457,7 @@ def __init__(self, _db, collection=None, input_identifiers=None,
# if INPUT_IDENTIFIER_TYPES is not set properly.
self.input_identifier_types = self._input_identifier_types()

@property
def _default_replacement_policy(self):
def _default_replacement_policy(self, _db):
"""Unless told otherwise, assume that we are getting
this data from a reliable metadata source.
"""
Expand Down Expand Up @@ -743,13 +742,12 @@ def __init__(self, collection, **kwargs):
_db, collection, **kwargs
)

@property
def _default_replacement_policy(self):
def _default_replacement_policy(self, _db):
"""Unless told otherwise, assume that we are getting
this data from a reliable source of both metadata and circulation
information.
"""
return ReplacementPolicy.from_license_source()
return ReplacementPolicy.from_license_source(_db)

@classmethod
def all(cls, _db, **kwargs):
Expand Down
19 changes: 13 additions & 6 deletions local_analytics_provider.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
from flask.ext.babel import lazy_gettext as _
from model import Session, CirculationEvent

class LocalAnalyticsProvider(object):
@classmethod
def from_config(cls, config):
return cls()
NAME = _("Local Analytics")

DESCRIPTION = _("Store analytics events in the 'circulationevents' database table.")

def __init__(self, integration):
self.integration_id = integration.id

def collect_event(self, _db, license_pool, event_type, time,
def collect_event(self, library, license_pool, event_type, time,
old_value=None, new_value=None, **kwargs):
from model import CirculationEvent
_db = Session.object_session(library)

CirculationEvent.log(
_db, license_pool, event_type, old_value, new_value, start=time)

Provider = LocalAnalyticsProvider
Provider = LocalAnalyticsProvider
38 changes: 23 additions & 15 deletions metadata_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
Work,
)
from classifier import NO_VALUE, NO_NUMBER
from analytics import Analytics

class ReplacementPolicy(object):
"""How serious should we be about overwriting old metadata with
Expand All @@ -59,6 +60,7 @@ def __init__(
link_content=False,
mirror=None,
content_modifier=None,
analytics=None,
http_get=None,
even_if_not_apparently_updated=False,
presentation_calculation_policy=None
Expand All @@ -73,18 +75,20 @@ def __init__(
self.even_if_not_apparently_updated = even_if_not_apparently_updated
self.mirror = mirror
self.content_modifier = content_modifier
self.analytics = analytics
self.http_get = http_get
self.presentation_calculation_policy = (
presentation_calculation_policy or
PresentationCalculationPolicy()
)

@classmethod
def from_license_source(self, **args):
def from_license_source(self, _db, **args):
"""When gathering data from the license source, overwrite all old data
from this source with new data from the same source. Also
overwrite an old rights status with an updated status and update
the list of available formats.
the list of available formats. Log availability changes to the
configured analytics services.
"""
return ReplacementPolicy(
identifiers=True,
Expand All @@ -93,6 +97,7 @@ def from_license_source(self, **args):
links=True,
rights=True,
formats=True,
analytics=Analytics(_db),
**args
)

Expand Down Expand Up @@ -802,11 +807,14 @@ def primary_identifier(self, _db):
self.primary_identifier_obj = obj
return self.primary_identifier_obj

def license_pool(self, _db, collection):
def license_pool(self, _db, collection, analytics=None):
"""Find or create a LicensePool object for this CirculationData.

:param collection: The LicensePool object will be associated with
the given Collection.

:param analytics: If the LicensePool is newly created, the event
will be tracked with this.
"""
if not collection:
raise ValueError(
Expand All @@ -830,17 +838,15 @@ def license_pool(self, _db, collection):
license_pool.open_access = self.has_open_access_link
license_pool.availability_time = self.last_checked
# This is our first time seeing this LicensePool. Log its
# occurence as a separate event.
event = get_one_or_create(
_db, CirculationEvent,
type=CirculationEvent.DISTRIBUTOR_TITLE_ADD,
license_pool=license_pool,
create_method_kwargs=dict(
start=self.last_checked,
delta=1,
end=self.last_checked,
)
)
# occurrence as a separate analytics event.
if analytics:
for library in collection.libraries:
analytics.collect_event(
library, license_pool,
CirculationEvent.DISTRIBUTOR_TITLE_ADD,
self.last_checked,
old_value=0, new_value=1,
)
license_pool.last_checked = self.last_checked

return license_pool, is_new
Expand Down Expand Up @@ -900,7 +906,7 @@ def apply(self, _db, collection, replace=None):

pool = None
if collection:
pool, ignore = self.license_pool(_db, collection)
pool, ignore = self.license_pool(_db, collection, replace.analytics)

data_source = self.data_source(_db)
identifier = self.primary_identifier(_db)
Expand Down Expand Up @@ -978,11 +984,13 @@ def apply(self, _db, collection, replace=None):
if pool and self._availability_needs_update(pool):
# Update availabily information. This may result in
# the issuance of additional circulation events.
analytics = Analytics(_db)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this, but I'm not sure anything else would be better.

Copy link
Contributor

@leonardr leonardr Jun 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is a performance problem. We're looking at database queries and module imports every time we want to change a LicensePool. I think analytics should be passed into ReplacementPolicy just like mirror is. That should cut down on the number of times we have to instantiate a new Analytics object.

changed_availability = pool.update_availability(
new_licenses_owned=self.licenses_owned,
new_licenses_available=self.licenses_available,
new_licenses_reserved=self.licenses_reserved,
new_patrons_in_hold_queue=self.patrons_in_hold_queue,
analytics=replace.analytics,
as_of=self.last_checked
)

Expand Down
13 changes: 5 additions & 8 deletions mock_analytics_provider.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
class MockAnalyticsProvider(object):
"""A mock analytics provider that keeps track of how many times it's called."""

@classmethod
def from_config(cls, config):
return cls(config.get('option'))

def __init__(self, option=None):
self.option = option
def __init__(self, integration=None, library=None):
self.count = 0
self.event = None
if integration:
self.url = integration.url

def collect_event(self, _db, lp, event_type, time, **kwargs):
def collect_event(self, library, lp, event_type, time=None, **kwargs):
self.count = self.count + 1
self.event_type = event_type
self.time = time

Provider = MockAnalyticsProvider
Provider = MockAnalyticsProvider
12 changes: 7 additions & 5 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@
INT4RANGE,
)
from s3 import S3Uploader
from analytics import Analytics


DEBUG = False
Expand Down Expand Up @@ -6357,7 +6356,8 @@ def needs_update(self):

def update_availability(
self, new_licenses_owned, new_licenses_available,
new_licenses_reserved, new_patrons_in_hold_queue, as_of=None):
new_licenses_reserved, new_patrons_in_hold_queue,
analytics=None, as_of=None):
"""Update the LicensePool with new availability information.
Log the implied changes as CirculationEvents.
"""
Expand Down Expand Up @@ -6395,9 +6395,11 @@ def update_availability(
if not event_name:
continue

Analytics.collect_event(
_db, self, event_name, as_of,
old_value=old_value, new_value=new_value)
if analytics:
for library in self.collection.libraries:
analytics.collect_event(
library, self, event_name, as_of,
old_value=old_value, new_value=new_value)

# Update the license pool with the latest information.
any_data = False
Expand Down
Loading