From 422a4526989071242c6181b21a2325cbe1f97106 Mon Sep 17 00:00:00 2001 From: Mike Decker Date: Wed, 20 Mar 2024 10:42:53 -0700 Subject: [PATCH] Remove algolia index items for intranet sites --- .../stanford_intranet.module | 34 +++++++++++++-- stanford_profile_helper.module | 43 ++++++++++++------- 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/modules/stanford_intranet/stanford_intranet.module b/modules/stanford_intranet/stanford_intranet.module index 3e629d3e..a70ed9c1 100644 --- a/modules/stanford_intranet/stanford_intranet.module +++ b/modules/stanford_intranet/stanford_intranet.module @@ -13,6 +13,7 @@ use Drupal\Core\StreamWrapper\StreamWrapperManager; use Drupal\file\FileInterface; use Drupal\node\NodeInterface; use Drupal\paragraphs\ParagraphInterface; +use Drupal\search_api\IndexInterface; use Drupal\stanford_intranet\Plugin\Field\FieldType\EntityAccessFieldType; use Drupal\user\RoleInterface; use Symfony\Component\Finder\Finder; @@ -117,7 +118,7 @@ function stanford_intranet_library_info_build() { */ function stanford_intranet_entity_create_access(AccountInterface $account, array $context, $entity_bundle) { // Block access to uploading files on the intranet. Leave the door open for - // the super user account though. + // the user 1 account though. if ( $context['entity_type_id'] == 'media' && $entity_bundle == 'file' && @@ -134,10 +135,14 @@ function stanford_intranet_entity_create_access(AccountInterface $account, array * Implements hook_entity_access(). */ function stanford_intranet_entity_access(EntityInterface $entity, $operation, AccountInterface $account) { + // Paragraphs inherit their access from the parents they live on, so we can // ignore them. + // Check for the role because doing `$account->isAuthenticated() only checks + // for the uid > 0. This doesn't work for search_api functionality, so just + // check for the role instead. if ( - $account->isAnonymous() && + !in_array(RoleInterface::AUTHENTICATED_ID, $account->getRoles()) && \Drupal::state()->get('stanford_intranet', FALSE) && !($entity instanceof ParagraphInterface) ) { @@ -236,8 +241,11 @@ function stanford_intranet_node_grants(AccountInterface $account, $op) { function stanford_intranet_user_role_insert(RoleInterface $role) { $state = \Drupal::state()->get('stanford_intranet.rids', []); $state = array_flip($state); + $roles = \Drupal::entityTypeManager() + ->getStorage('user_role') + ->loadMultiple(); - foreach (array_keys(user_role_names()) as $role_id) { + foreach (array_keys($roles) as $role_id) { if (!in_array($role_id, $state)) { $state[] = $role_id; } @@ -263,3 +271,23 @@ function stanford_intranet_form_node_form_alter(&$form, FormStateInterface $form $form[EntityAccessFieldType::FIELD_NAME]['#group'] = 'revision_information'; } } + +/** + * Implements hook_search_api_algolia_objects_alter(). + */ +function stanford_intranet_search_api_algolia_objects_alter(array &$objects, IndexInterface $index, array $items) { + if (!\Drupal::state()->get('stanford_intranet', FALSE)) { + return; + } + $node_storage = \Drupal::entityTypeManager()->getStorage('node'); + $current_user = \Drupal::currentUser(); + + // Filters out items that the current user (search_api configured) does not + // have access to view. + $objects = array_filter($objects, function($item) use ($node_storage, $current_user) { + $nodes = $node_storage->loadByProperties(['uuid' => $item['objectID']]); + /** @var \Drupal\node\NodeInterface $node */ + $node = reset($nodes); + return $node->access('view', $current_user); + }); +} diff --git a/stanford_profile_helper.module b/stanford_profile_helper.module index e6551253..31378008 100644 --- a/stanford_profile_helper.module +++ b/stanford_profile_helper.module @@ -1148,7 +1148,7 @@ function stanford_profile_helper_search_api_processor_info_alter(array &$process * Implements hook_entity_view_display_alter(). */ function stanford_profile_helper_entity_view_display_alter(EntityViewDisplayInterface $display, array $context) { - if ($context['view_mode'] == 'search_indexing' && $context['entity_type'] == 'node') { + if (str_contains($context['view_mode'], 'search_indexing') && $context['entity_type'] == 'node') { // The title is already in the template, it's not needed in the display. $display->removeComponent('title'); } @@ -1167,28 +1167,41 @@ function stanford_profile_helper_filter_format_access(EntityInterface $entity, $ function stanford_profile_helper_search_api_algolia_objects_alter(array &$objects, IndexInterface $index, array $items) { /** @var \Drupal\config_pages\ConfigPagesLoaderServiceInterface $config_page_loader */ $config_page_loader = \Drupal::service('config_pages.loader'); + // If the canonical url is set, use that to adjust the urls. $site_domain = $config_page_loader->getValue('stanford_basic_site_settings', 'su_site_url', 0, 'uri'); $current_host = \Drupal::request()->getSchemeAndHttpHost(); + // If there is no HTML, the item can not be indexed, so remove it. + $objects = array_filter($objects, fn($item) => !empty($item['html'])); + foreach ($objects as &$item) { // Remove fields that aren't necessary. - unset($item['search_api_id'], $item['search_api_datasource'], $item['search_api_language']); - - // Either the canonical url hasn't been set, or it matches the current - // request. It would match the current request when the event is happening - // in the UI. If cron is running, the current host won't match the canonical - // url. - if (!$site_domain || $site_domain == $current_host) { - continue; - } + unset($item['search_api_datasource'], $item['search_api_language'], $item['status']); + + foreach ($item as $name => &$field) { + // Data that is being sent as the taxonomy term names should always be + // sent as an array of strings. When the node is only configured with one + // term in the field, it tries to send it as a string. So we force to be + // an array. + $property_path = $index->getField($name)?->getPropertyPath() ?: ''; + if (is_string($field) && str_contains($property_path, ':entity:name')) { + $field = [$field]; + } - // Change the urls from the current host to the canonical url. - foreach ($item as &$field) { - if (!is_string($field) || !str_contains($field, $current_host)) { - continue; + // Either the canonical url hasn't been set, or it matches the current + // request. It would match the current request when the event is happening + // in the UI. If cron is running, the current host won't match the canonical + // url. + if ( + $site_domain && + $site_domain != $current_host && + is_string($field) && + str_contains($field, $current_host) + ) { + // Change the urls from the current host to the canonical url. + $field = str_replace($current_host, $site_domain, $field); } - $field = str_replace($current_host, $site_domain, $field); } } }