From be990849ad043d72e76f454b54bf4134fc39d1d2 Mon Sep 17 00:00:00 2001 From: ivis-nakagawa Date: Tue, 27 Aug 2024 17:49:16 +0900 Subject: [PATCH 1/5] weko#44635 Fix bulk delete issue --- modules/weko-search-ui/tests/conftest.py | 9 +++++ modules/weko-search-ui/tests/test_utils.py | 36 +++++++++++++++++-- .../weko-search-ui/weko_search_ui/admin.py | 2 +- .../weko-search-ui/weko_search_ui/utils.py | 17 +++++++-- 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/modules/weko-search-ui/tests/conftest.py b/modules/weko-search-ui/tests/conftest.py index 4ff0fe224e..c69385c1f8 100644 --- a/modules/weko-search-ui/tests/conftest.py +++ b/modules/weko-search-ui/tests/conftest.py @@ -1016,9 +1016,18 @@ def indices(app, db): testIndexPrivate = Index( index_name="testIndexPrivate", public_state=False, id=55 ) + testIndexSix = Index( + index_name="testIndexSix", + browsing_role="Contributor", + public_state=True, + id=66, + position=1, + item_custom_sort={}, + ) db.session.add(testIndexThree) db.session.add(testIndexThreeChild) + db.session.add(testIndexSix) return { "index_dict": dict(testIndexThree), diff --git a/modules/weko-search-ui/tests/test_utils.py b/modules/weko-search-ui/tests/test_utils.py index e8ce42f03d..8e7f755ff9 100644 --- a/modules/weko-search-ui/tests/test_utils.py +++ b/modules/weko-search-ui/tests/test_utils.py @@ -3,11 +3,13 @@ import copy import json import os +import time import unittest -from datetime import datetime +from datetime import datetime, timedelta import uuid import pytest +from elasticsearch import helpers from flask import current_app, make_response, request from flask_babelex import Babel from flask_login import current_user @@ -176,7 +178,7 @@ def can(self): return True # def get_tree_items(index_tree_id): ERROR ~ AttributeError: '_AppCtxGlobals' object has no attribute 'identity' # .tox/c1/bin/pytest --cov=weko_search_ui tests/test_utils.py::test_get_tree_items -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-search-ui/.tox/c1/tmp -def test_get_tree_items(i18n_app, indices, users, mocker): +def test_get_tree_items(i18n_app, indices, users, mocker, es): i18n_app.config['WEKO_SEARCH_TYPE_INDEX'] = 'index' i18n_app.config['OAISERVER_ES_MAX_CLAUSE_COUNT'] = 1 i18n_app.config['WEKO_ADMIN_MANAGEMENT_OPTIONS'] = WEKO_ADMIN_MANAGEMENT_OPTIONS @@ -192,6 +194,8 @@ def to_dict(self): return self.data def __init__(self,data): self.data=data + def extra(self,size): + return self def execute(self): return self.MockExecute(self.data) def mock_search_factory(self, search,index_id=None): @@ -202,6 +206,34 @@ def mock_search_factory(self, search,index_id=None): # with patch("weko_search_ui.query.item_path_search_factory", return_value="{'abc': 123}"): assert get_tree_items(33) + def _generate_es_data(num, start_datetime=datetime.now()): + for i in range(num): + doc = { + "_index": i18n_app.config['INDEXER_DEFAULT_INDEX'], + "_type": "item-v1.0.0", + "_id": f"2d1a2520-9080-437f-a304-230adc8{i:05d}", + "_source": { + "_item_metadata": { + "title": [f"test_title_{i}"], + }, + "relation_version_is_last": True, + "path": ["66"], + "control_number": f"{i:05d}", + "_created": (start_datetime + timedelta(seconds=i)).isoformat(), + "publish_status": "0", + }, + } + yield doc + + generate_data_num = 20005 + helpers.bulk(es, _generate_es_data(generate_data_num)) + es.indices.refresh(index=i18n_app.config['INDEXER_DEFAULT_INDEX']) + i18n_app.config['RECORDS_REST_SORT_OPTIONS'] = {"test-weko":{"controlnumber":{"title":"ID","fields": ["control_number"],"default_order": "asc","order": 2}}} + + with i18n_app.test_request_context(query_string={"sort": "control_number", "q": "66"}): + with patch("flask_login.utils._get_user", return_value=users[3]["obj"]): + assert len(get_tree_items(66)) == generate_data_num + # def delete_records(index_tree_id, ignore_items): def test_delete_records(i18n_app, db_activity): diff --git a/modules/weko-search-ui/weko_search_ui/admin.py b/modules/weko-search-ui/weko_search_ui/admin.py index bc7fb60120..6cc06ee3f5 100644 --- a/modules/weko-search-ui/weko_search_ui/admin.py +++ b/modules/weko-search-ui/weko_search_ui/admin.py @@ -289,7 +289,7 @@ def index(self): recursive_tree = Indexes.get_recursive_tree(q) if current_tree is not None: - tree_items = get_tree_items(current_tree.id) + tree_items = get_tree_items(current_tree.id, 1) has_items = len(tree_items) > 0 if recursive_tree is not None: has_child_trees = len(recursive_tree) > 1 diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index f59bb12e35..a150602630 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -195,7 +195,7 @@ def __repr__(self): ) -def get_tree_items(index_tree_id): +def get_tree_items(index_tree_id, size=10000): """Get tree items.""" records_search = RecordsSearch() records_search = records_search.with_preference_param().params(version=False) @@ -203,9 +203,22 @@ def get_tree_items(index_tree_id): search_instance, _ = item_path_search_factory( None, records_search, index_id=index_tree_id ) + search_instance = search_instance.extra(size=size) search_result = search_instance.execute() rd = search_result.to_dict() - return rd.get("hits").get("hits") + result = rd.get("hits").get("hits") + + while len(rd['hits']['hits']) == 10000: + search_after = rd['hits']['hits'][-1]['sort'] + search_instance = search_instance.extra( + size=size, + search_after=search_after + ) + search_result = search_instance.execute() + rd = search_result.to_dict() + result.extend(rd.get("hits").get("hits")) + + return result def delete_records(index_tree_id, ignore_items): From 9361e584c0d982580f768c7cef7b98c445f2f7cb Mon Sep 17 00:00:00 2001 From: ivis-nakagawa Date: Thu, 5 Sep 2024 16:32:12 +0900 Subject: [PATCH 2/5] weko#44914 Fix to retrieve more than 10 items with DOI or in editing state --- modules/weko-index-tree/tests/conftest.py | 10 ++- modules/weko-index-tree/tests/test_utils.py | 47 ++++++++++---- .../weko-index-tree/weko_index_tree/utils.py | 13 ++-- modules/weko-search-ui/tests/test_utils.py | 21 +++++-- .../weko-search-ui/weko_search_ui/utils.py | 61 +++++++++++++------ 5 files changed, 113 insertions(+), 39 deletions(-) diff --git a/modules/weko-index-tree/tests/conftest.py b/modules/weko-index-tree/tests/conftest.py index 552de8b06d..57b6f79445 100644 --- a/modules/weko-index-tree/tests/conftest.py +++ b/modules/weko-index-tree/tests/conftest.py @@ -745,6 +745,13 @@ def indices(app, db): id=45, position=1 ) + testIndexSix = Index( + index_name="testIndexSix", + browsing_role="1,2,3,4,-98,-99", + public_state=True, + id=66, + position=4 + ) db.session.add(testIndexOne) @@ -753,7 +760,8 @@ def indices(app, db): db.session.add(testIndexThreeChild) db.session.add(testIndexMore) db.session.add(testIndexPrivate) - + db.session.add(testIndexSix) + return { 'index_dict': dict(testIndexThree), 'index_non_dict': testIndexThree, diff --git a/modules/weko-index-tree/tests/test_utils.py b/modules/weko-index-tree/tests/test_utils.py index 76da43b98c..063f244fb7 100644 --- a/modules/weko-index-tree/tests/test_utils.py +++ b/modules/weko-index-tree/tests/test_utils.py @@ -54,6 +54,7 @@ import redis from redis import sentinel +from elasticsearch import helpers from elasticsearch.exceptions import NotFoundError from elasticsearch_dsl.query import Bool, Exists, Q, QueryString from flask import Markup, current_app, session @@ -477,13 +478,47 @@ def test_check_doi_in_index(i18n_app, indices, db_records): #*** def get_record_in_es_of_index(index_id, recursively=True): +#*** def check_doi_in_index_and_child_index(index_id, recursively=True): +# .tox/c1/bin/pytest --cov=weko_index_tree tests/test_utils.py::test_get_record_in_es_of_index -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-index-tree/.tox/c1/tmp def test_get_record_in_es_of_index(i18n_app, indices, db_records, esindex): # Test 1 assert not get_record_in_es_of_index(44, recursively=False) + assert not check_doi_in_index_and_child_index(44, recursively=False) # Test 2 # assert get_record_in_es_of_index(33) + def _generate_es_data(num, start_datetime=datetime.now()): + for i in range(num): + doc = { + "_index": i18n_app.config['INDEXER_DEFAULT_INDEX'], + "_type": "item-v1.0.0", + "_id": f"2d1a2520-9080-437f-a304-230adc8{i:05d}", + "_source": { + "_item_metadata": { + "title": [f"test_title_{i}"], + }, + "relation_version_is_last": True, + "path": ["66"], + "control_number": f"{i:05d}", + "_created": (start_datetime + timedelta(seconds=i)).isoformat(), + "publish_status": "0", + }, + } + if i % 2 == 0: + doc["_source"]["identifierRegistration"] = { + "identifierType": "DOI", + "value": f"10.9999/test_doi_{i:05d}", + } + yield doc + + generate_data_num = 30002 + helpers.bulk(esindex.client, _generate_es_data(generate_data_num), refresh='true') + + # result over 10000 + assert len(get_record_in_es_of_index(66)) == generate_data_num + assert len(check_doi_in_index_and_child_index(66)) == int(generate_data_num / 2) + # def check_doi_in_list_record_es(index_id): # .tox/c1/bin/pytest --cov=weko_index_tree tests/test_utils.py::test_check_doi_in_list_record_es -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-index-tree/.tox/c1/tmp @@ -542,16 +577,6 @@ def test_check_index_permissions(app, db, users, test_indices, db_records): assert check_index_permissions(index_path_list=["1", "2"], is_check_doi=True)==True -# *** def check_doi_in_index_and_child_index(index_id, recursively=True): -# def test_check_doi_in_index_and_child_index(i18n_app, indices, esindex, db_records, records2): -def test_check_doi_in_index_and_child_index(i18n_app, users, indices, esindex): - # Test 1 - assert len(check_doi_in_index_and_child_index(33, recursively=True)) == 0 - - # Test 2 - # assert len(check_doi_in_index_and_child_index(33, recursively=True)) > 0 - - #+++ def __get_redis_store(): def test___get_redis_store(i18n_app): assert __get_redis_store() @@ -640,7 +665,7 @@ def test_get_doi_items_in_index(app): # def get_editing_items_in_index(index_id, recursively=False): -# .tox/c1/bin/pytest --cov=weko_index_tree tests/test_utils.py::test_save_index_trees_to_redis -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-index-tree/.tox/c1/tmp +# .tox/c1/bin/pytest --cov=weko_index_tree tests/test_utils.py::test_get_editing_items_in_index -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-index-tree/.tox/c1/tmp def test_get_editing_items_in_index(app): _es_data = [ { diff --git a/modules/weko-index-tree/weko_index_tree/utils.py b/modules/weko-index-tree/weko_index_tree/utils.py index 700cb1afca..4f6a2f0333 100644 --- a/modules/weko-index-tree/weko_index_tree/utils.py +++ b/modules/weko-index-tree/weko_index_tree/utils.py @@ -592,7 +592,9 @@ def get_record_in_es_of_index(index_id, recursively=True): @param index_id: @return: """ + from weko_search_ui.utils import execute_search_with_pagination from .api import Indexes + if recursively: child_idx = Indexes.get_child_list_recursive(index_id) else: @@ -601,6 +603,7 @@ def get_record_in_es_of_index(index_id, recursively=True): query_string = "relation_version_is_last:true" search = RecordsSearch( index=current_app.config['SEARCH_UI_SEARCH_INDEX']) + search = search.sort({"control_number": {"order": "asc"}}) must_query = [ QueryString(query=query_string), Q("terms", path=child_idx), @@ -612,9 +615,7 @@ def get_record_in_es_of_index(index_id, recursively=True): search = search.query( Bool(filter=must_query) ) - records = search.execute().to_dict().get('hits', {}).get('hits', []) - - return records + return execute_search_with_pagination(search, max_result_size=-1) def check_doi_in_list_record_es(index_id): @@ -818,15 +819,18 @@ def check_doi_in_index_and_child_index(index_id, recursively=True): Args: index_id (list): Record list. """ + from weko_search_ui.utils import execute_search_with_pagination from .api import Indexes if recursively: child_idx = Indexes.get_child_list_recursive(index_id) else: child_idx = [index_id] + query_string = "relation_version_is_last:true AND publish_status: {}".format(PublishStatus.PUBLIC.value) search = RecordsSearch( index=current_app.config['SEARCH_UI_SEARCH_INDEX']) + search = search.sort({"control_number": {"order": "asc"}}) must_query = [ QueryString(query=query_string), Q("terms", path=child_idx), @@ -836,8 +840,7 @@ def check_doi_in_index_and_child_index(index_id, recursively=True): search = search.query( Bool(filter=must_query) ) - records = search.execute().to_dict().get('hits', {}).get('hits', []) - return records + return execute_search_with_pagination(search, max_result_size=-1) def __get_redis_store(): diff --git a/modules/weko-search-ui/tests/test_utils.py b/modules/weko-search-ui/tests/test_utils.py index 8e7f755ff9..ca4acec2f6 100644 --- a/modules/weko-search-ui/tests/test_utils.py +++ b/modules/weko-search-ui/tests/test_utils.py @@ -176,9 +176,12 @@ def __init__(self): def can(self): return True + + +# def execute_search_with_pagination(search_instance, get_all=False, size=None): # def get_tree_items(index_tree_id): ERROR ~ AttributeError: '_AppCtxGlobals' object has no attribute 'identity' # .tox/c1/bin/pytest --cov=weko_search_ui tests/test_utils.py::test_get_tree_items -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-search-ui/.tox/c1/tmp -def test_get_tree_items(i18n_app, indices, users, mocker, es): +def test_get_tree_items(i18n_app, indices, users, mocker, esindex): i18n_app.config['WEKO_SEARCH_TYPE_INDEX'] = 'index' i18n_app.config['OAISERVER_ES_MAX_CLAUSE_COUNT'] = 1 i18n_app.config['WEKO_ADMIN_MANAGEMENT_OPTIONS'] = WEKO_ADMIN_MANAGEMENT_OPTIONS @@ -209,7 +212,7 @@ def mock_search_factory(self, search,index_id=None): def _generate_es_data(num, start_datetime=datetime.now()): for i in range(num): doc = { - "_index": i18n_app.config['INDEXER_DEFAULT_INDEX'], + "_index": i18n_app.config.get("INDEXER_DEFAULT_INDEX", "test-weko-item-v1.0.0"), "_type": "item-v1.0.0", "_id": f"2d1a2520-9080-437f-a304-230adc8{i:05d}", "_source": { @@ -226,13 +229,21 @@ def _generate_es_data(num, start_datetime=datetime.now()): yield doc generate_data_num = 20005 - helpers.bulk(es, _generate_es_data(generate_data_num)) - es.indices.refresh(index=i18n_app.config['INDEXER_DEFAULT_INDEX']) + helpers.bulk(esindex, _generate_es_data(generate_data_num), refresh='true') i18n_app.config['RECORDS_REST_SORT_OPTIONS'] = {"test-weko":{"controlnumber":{"title":"ID","fields": ["control_number"],"default_order": "asc","order": 2}}} with i18n_app.test_request_context(query_string={"sort": "control_number", "q": "66"}): with patch("flask_login.utils._get_user", return_value=users[3]["obj"]): - assert len(get_tree_items(66)) == generate_data_num + # max_result_size < 0 + assert len(get_tree_items(66, max_result_size=-1)) == generate_data_num + # max_result_size default + assert len(get_tree_items(66)) == 10000 + # max_result_size = 1 + assert len(get_tree_items(66, max_result_size=1)) == 1 + # max_result_size = 15000 + assert len(get_tree_items(66, max_result_size=15000)) == 15000 + # max_result_size = 30000 + assert len(get_tree_items(66, max_result_size=30000)) == generate_data_num # def delete_records(index_tree_id, ignore_items): diff --git a/modules/weko-search-ui/weko_search_ui/utils.py b/modules/weko-search-ui/weko_search_ui/utils.py index a150602630..2d529f5257 100644 --- a/modules/weko-search-ui/weko_search_ui/utils.py +++ b/modules/weko-search-ui/weko_search_ui/utils.py @@ -124,6 +124,7 @@ WEKO_IMPORT_VALIDATE_MESSAGE, WEKO_REPO_USER, WEKO_SEARCH_TYPE_DICT, + WEKO_SEARCH_MAX_RESULT, WEKO_SEARCH_UI_BULK_EXPORT_LIMIT, WEKO_SEARCH_UI_BULK_EXPORT_MSG, WEKO_SEARCH_UI_BULK_EXPORT_RUN_MSG, @@ -195,35 +196,61 @@ def __repr__(self): ) -def get_tree_items(index_tree_id, size=10000): - """Get tree items.""" - records_search = RecordsSearch() - records_search = records_search.with_preference_param().params(version=False) - records_search._index[0] = current_app.config["SEARCH_UI_SEARCH_INDEX"] - search_instance, _ = item_path_search_factory( - None, records_search, index_id=index_tree_id - ) - search_instance = search_instance.extra(size=size) +def execute_search_with_pagination( + search_instance, + max_result_size=WEKO_SEARCH_MAX_RESULT +): + """Execute search with pagination. + + @param search_instance: search instance + @param max_result_size: maximum number of records to get + if < 0, get all records + @return: search result + """ + if max_result_size < 0: + search_size = 10000 + else: + search_size = min(max_result_size, 10000) + max_result_size -= search_size + + search_instance = search_instance.extra(size=search_size) search_result = search_instance.execute() - rd = search_result.to_dict() - result = rd.get("hits").get("hits") + records = search_result.to_dict().get('hits', {}).get('hits', []) + result = records - while len(rd['hits']['hits']) == 10000: - search_after = rd['hits']['hits'][-1]['sort'] + while len(records) == 10000 and max_result_size != 0: + if max_result_size < 0: + search_size = 10000 + else: + search_size = min(max_result_size, 10000) + max_result_size -= search_size + + search_after = records[-1]['sort'] search_instance = search_instance.extra( - size=size, + size=search_size, search_after=search_after ) search_result = search_instance.execute() - rd = search_result.to_dict() - result.extend(rd.get("hits").get("hits")) + records = search_result.to_dict().get('hits', {}).get('hits', []) + result.extend(records) return result +def get_tree_items(index_tree_id, max_result_size=WEKO_SEARCH_MAX_RESULT): + """Get tree items.""" + records_search = RecordsSearch() + records_search = records_search.with_preference_param().params(version=False) + records_search._index[0] = current_app.config["SEARCH_UI_SEARCH_INDEX"] + search_instance, _ = item_path_search_factory( + None, records_search, index_id=index_tree_id + ) + return execute_search_with_pagination(search_instance, max_result_size) + + def delete_records(index_tree_id, ignore_items): """Bulk delete records.""" - hits = get_tree_items(index_tree_id) + hits = get_tree_items(index_tree_id, max_result_size=-1) result = [] for hit in hits: From af4af5d4f130a6b5e4d739427aaeb9e2d962a1ab Mon Sep 17 00:00:00 2001 From: ivis-nakagawa Date: Thu, 5 Sep 2024 16:38:01 +0900 Subject: [PATCH 3/5] weko#44914 Refactor item locking checks --- modules/weko-index-tree/tests/test_utils.py | 13 ++++-- .../weko-index-tree/weko_index_tree/utils.py | 18 ++++---- modules/weko-workflow/tests/test_utils.py | 45 +++++++++++++++++-- modules/weko-workflow/weko_workflow/utils.py | 24 ++++++++++ 4 files changed, 85 insertions(+), 15 deletions(-) diff --git a/modules/weko-index-tree/tests/test_utils.py b/modules/weko-index-tree/tests/test_utils.py index 063f244fb7..db20eea63a 100644 --- a/modules/weko-index-tree/tests/test_utils.py +++ b/modules/weko-index-tree/tests/test_utils.py @@ -682,12 +682,17 @@ def test_get_editing_items_in_index(app): with patch("weko_index_tree.utils.get_record_in_es_of_index", return_value=_es_data): with patch("weko_items_ui.utils.check_item_is_being_edit", return_value=True): with patch("invenio_pidstore.models.PersistentIdentifier.get", return_value=True): - res = get_editing_items_in_index(0) - assert res == ["1", "2"] - + with patch("weko_workflow.utils.bulk_check_an_item_is_locked", return_value=["1", "2"]): + res = get_editing_items_in_index(0) + assert res == ["1", "2"] + with patch("weko_items_ui.utils.check_item_is_being_edit", return_value=False): with patch("invenio_pidstore.models.PersistentIdentifier.get", return_value=True): - with patch("weko_workflow.utils.check_an_item_is_locked", return_value=False): + with patch("weko_workflow.utils.bulk_check_an_item_is_locked", return_value=["1"]): + res = get_editing_items_in_index(0) + assert res == ["1"] + + with patch("weko_workflow.utils.bulk_check_an_item_is_locked", return_value=[]): res = get_editing_items_in_index(0) assert res == [] diff --git a/modules/weko-index-tree/weko_index_tree/utils.py b/modules/weko-index-tree/weko_index_tree/utils.py index 4f6a2f0333..0a0b81003d 100644 --- a/modules/weko-index-tree/weko_index_tree/utils.py +++ b/modules/weko-index-tree/weko_index_tree/utils.py @@ -1017,19 +1017,21 @@ def get_editing_items_in_index(index_id, recursively=False): @return: """ from weko_items_ui.utils import check_item_is_being_edit - from weko_workflow.utils import check_an_item_is_locked + from weko_workflow.utils import bulk_check_an_item_is_locked result = [] records = get_record_in_es_of_index(index_id, recursively) - for record in records: - item_id = record.get('_source', {}).get( - '_item_metadata', {}).get('control_number') - if check_item_is_being_edit( - PersistentIdentifier.get('recid', item_id)) or \ - check_an_item_is_locked(int(item_id)): + item_ids = [ + record.get('_source', {}).get('_item_metadata', {}).get('control_number') + for record in records + ] + for item_id in item_ids: + if check_item_is_being_edit(PersistentIdentifier.get('recid', item_id)): result.append(item_id) - return result + result.extend(bulk_check_an_item_is_locked(item_ids)) + + return sorted(list(set(result))) def save_index_trees_to_redis(tree, lang=None): """save inde_tree to redis for roles diff --git a/modules/weko-workflow/tests/test_utils.py b/modules/weko-workflow/tests/test_utils.py index 515ded0e5a..962dac6791 100644 --- a/modules/weko-workflow/tests/test_utils.py +++ b/modules/weko-workflow/tests/test_utils.py @@ -135,6 +135,8 @@ grant_access_rights_to_all_open_restricted_files, delete_lock_activity_cache, delete_user_lock_activity_cache, + check_an_item_is_locked, + bulk_check_an_item_is_locked, ) from weko_workflow.api import GetCommunity, UpdateItem, WorkActivity, WorkActivityHistory, WorkFlow from weko_workflow.models import Activity @@ -1008,11 +1010,48 @@ def test_get_cache_data(client): current_cache.set(key, value) result = get_cache_data(key) assert result == value + + # def check_an_item_is_locked(item_id=None): +# def bulk_check_an_item_is_locked(item_ids=[]): # def check(workers): -# .tox/c1/bin/pytest --cov=weko_workflow tests/test_utils.py::test_get_current_language -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-workflow/.tox/c1/tmp -def test_check_an_item_is_locked(): - pass +# .tox/c1/bin/pytest --cov=weko_workflow tests/test_utils.py::test_check_an_item_is_locked -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-workflow/.tox/c1/tmp +def test_check_an_item_is_locked(app): + with app.app_context(): + with patch("weko_workflow.utils.inspect") as mock_inspect: + mock_inspect_instance = mock_inspect.return_value + # inspect(timeout=_timeout).ping() + mock_inspect_instance.ping.return_value = True + # inspect(timeout=_timeout).active() + mock_inspect_instance.active.return_value = { + 'worker1': [ + {'name': 'weko_search_ui.tasks.import_item', 'args': [{'id': '1'}]}, + {'name': 'weko_search_ui.tasks.import_item', 'args': [{'id': '2'}]}, + ], + 'worker2': [ + {'name': 'weko_search_ui.tasks.import_item', 'args': [{'id': '3'}, {'id': '99'}]}, + ], + } + # inspect(timeout=_timeout).reserved() + mock_inspect_instance.reserved.return_value = { + 'worker3': [ + {'name': 'weko_search_ui.tasks.import_item', 'args': [{'id': '4'}]}, + {'name': 'weko_search_ui.tasks.test_task', 'args': [{'id': '5'}]}, + ], + } + + item_ids = list(range(1,5)) + result = [] + for i in item_ids: + if check_an_item_is_locked(str(i)): + result.append(str(i)) + + assert bulk_check_an_item_is_locked(item_ids) == result == ["1","2","3","4"] + + assert check_an_item_is_locked() == False + assert bulk_check_an_item_is_locked() == [] + + # def get_account_info(user_id): # .tox/c1/bin/pytest --cov=weko_workflow tests/test_utils.py::test_get_accoutn_info -vv -s --cov-branch --cov-report=term --basetemp=/code/modules/weko-workflow/.tox/c1/tmp def test_get_accoutn_info(users): diff --git a/modules/weko-workflow/weko_workflow/utils.py b/modules/weko-workflow/weko_workflow/utils.py index 5569bc6bed..492575acfa 100644 --- a/modules/weko-workflow/weko_workflow/utils.py +++ b/modules/weko-workflow/weko_workflow/utils.py @@ -2023,6 +2023,30 @@ def check(workers): check(inspect(timeout=_timeout).reserved()) +def bulk_check_an_item_is_locked(item_ids=[]): + """Check bulk if an item is locked. + + :param item_ids: Item id list. + + :return list: Locked item id list. + """ + _timeout = current_app.config.get("CELERY_GET_STATUS_TIMEOUT", 3.0) + if not item_ids or not inspect(timeout=_timeout).ping(): + return [] + + item_ids = [str(item_id) for item_id in item_ids] + result = [] + for state in ['active', 'reserved']: + workers = getattr(inspect(timeout=_timeout), state)() + for worker in workers: + for task in workers[worker]: + if task['name'] == 'weko_search_ui.tasks.import_item' \ + and task['args'][0].get('id') in item_ids: + result.append(task['args'][0].get('id')) + + return result + + def get_account_info(user_id): """Get account's info: email, username. From 2af83a6ddc56066d40352bbafc1460b9c1e4ce5a Mon Sep 17 00:00:00 2001 From: ivis-nakagawa Date: Fri, 6 Sep 2024 14:23:19 +0900 Subject: [PATCH 4/5] weko#44981 Fix to retrieve more than 10000 items --- .../invenio_resourcesyncserver/query.py | 20 +- .../tests/conftest.py | 34 +- .../tests/data/item-v1.0.0.json | 1221 +++++++++++++++++ .../tests/test_query.py | 39 +- 4 files changed, 1296 insertions(+), 18 deletions(-) create mode 100644 modules/invenio-resourcesyncserver/tests/data/item-v1.0.0.json diff --git a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py index 557e4af144..f2d381b557 100644 --- a/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py +++ b/modules/invenio-resourcesyncserver/invenio_resourcesyncserver/query.py @@ -27,6 +27,7 @@ from invenio_search import RecordsSearch from weko_index_tree.api import Indexes from weko_schema_ui.models import PublishStatus +from weko_search_ui.utils import execute_search_with_pagination from .config import WEKO_ROOT_INDEX @@ -36,14 +37,13 @@ def get_items_by_index_tree(index_tree_id): records_search = RecordsSearch() records_search = records_search.with_preference_param().params( version=False) + records_search = records_search.sort({"control_number": {"order": "asc"}}) records_search._index[0] = current_app.config['SEARCH_UI_SEARCH_INDEX'] search_instance = item_path_search_factory( search=records_search, index_id=index_tree_id ) - search_result = search_instance.execute().to_dict() - - return search_result.get('hits').get('hits') + return execute_search_with_pagination(search_instance, -1) def get_item_changes_by_index(index_tree_id, date_from, date_until): @@ -58,9 +58,7 @@ def get_item_changes_by_index(index_tree_id, date_from, date_until): date_from=date_from, date_until=date_until ) - search_result = search_instance.execute().to_dict() - - return search_result.get('hits').get('hits') + return execute_search_with_pagination(search_instance, -1) def item_path_search_factory(search, index_id="0"): @@ -224,11 +222,13 @@ def _get_index_search_query(_date_from: str, _date_until: str) -> dict: ] } }, - "sort": { - "_updated": { - "order": "asc" + "sort": [ + { + "_updated": { + "order": "asc" + } } - }, + ], "post_filter": { "bool": { "must": [ diff --git a/modules/invenio-resourcesyncserver/tests/conftest.py b/modules/invenio-resourcesyncserver/tests/conftest.py index 3a0c8105e2..4b022cf8a1 100644 --- a/modules/invenio-resourcesyncserver/tests/conftest.py +++ b/modules/invenio-resourcesyncserver/tests/conftest.py @@ -688,6 +688,30 @@ def es(app): list(current_search.delete(ignore=[404])) +@pytest.fixture() +def esindex(app): + current_search_client.indices.delete(index="test-*") + with open("tests/data/item-v1.0.0.json", "r") as f: + mapping = json.load(f) + try: + current_search_client.indices.create( + app.config["INDEXER_DEFAULT_INDEX"], body=mapping + ) + current_search_client.indices.put_alias( + index=app.config["INDEXER_DEFAULT_INDEX"], name="test-weko" + ) + except: + current_search_client.indices.create("test-weko-items", body=mapping) + current_search_client.indices.put_alias( + index="test-weko-items", name="test-weko" + ) + + try: + yield current_search_client + finally: + current_search_client.indices.delete(index="test-*") + + @pytest.yield_fixture() def i18n_app(app): with app.test_request_context( @@ -725,10 +749,18 @@ def indices(app, db): ) testIndexMore = Index(index_name="testIndexMore",parent=33,public_state=True,id='more') testIndexPrivate = Index(index_name="testIndexPrivate",public_state=False,id=55) + testIndexSix = Index( + index_name="testIndexSix", + browsing_role="1,2,3,4,-98,-99", + public_state=True, + id=66, + position=4 + ) db.session.add(testIndexThree) db.session.add(testIndexThreeChild) - + db.session.add(testIndexSix) + return { 'index_dict': dict(testIndexThree), 'index_non_dict': testIndexThree, diff --git a/modules/invenio-resourcesyncserver/tests/data/item-v1.0.0.json b/modules/invenio-resourcesyncserver/tests/data/item-v1.0.0.json new file mode 100644 index 0000000000..5f1817ad97 --- /dev/null +++ b/modules/invenio-resourcesyncserver/tests/data/item-v1.0.0.json @@ -0,0 +1,1221 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 1, + "index.mapping.total_fields.limit": 50000, + "analysis": { + "tokenizer": { + "ja_tokenizer": { + "type": "kuromoji_tokenizer", + "mode": "search" + }, + "ngram_tokenizer": { + "type": "nGram", + "min_gram": 1, + "max_gram": 3, + "token_chars": [ + "letter", + "digit" + ] + } + }, + "char_filter": { + "weko_char_filter": { + "type": "mapping", + "mappings_path": "kui.txt" + } + }, + "analyzer": { + "default": { + "tokenizer": "ngram_tokenizer", + "filter": [ + "kuromoji_baseform", + "kuromoji_part_of_speech", + "cjk_width", + "stop", + "kuromoji_stemmer", + "lowercase" + ], + "char_filter": [ + "weko_char_filter" + ] + }, + "ngram_analyzer": { + "type": "custom", + "char_filter": [ + "weko_char_filter", + "html_strip" + ], + "tokenizer": "ngram_tokenizer", + "filter": [ + "cjk_width", + "lowercase" + ] + }, + "wk_analyzer": { + "type": "custom", + "char_filter": [ + "html_strip" + ], + "tokenizer": "standard", + "filter": [ + "standard", + "lowercase", + "stop", + "cjk_width" + ] + }, + "paths": { + "tokenizer": "path_hierarchy" + } + } + } + }, + "mappings": { + "item-v1.0.0": { + "properties": { + "path": { + "type": "keyword", + "index": true, + "fields": { + "tree": { + "type": "text", + "fielddata": true, + "analyzer": "paths" + } + } + }, + "item_type_id": { + "type": "keyword", + "index": true + }, + "itemtype": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "fielddata": true, + "copy_to": [ + "search_other" + ] + }, + "publish_status": { + "type": "keyword", + "index": true + }, + "publish_date": { + "type": "date", + "format": "yyyy-MM-dd||yyyy-MM||yyyy" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + }, + "_oai": { + "type": "object", + "properties": { + "id": { + "type": "keyword", + "index": true + }, + "sets": { + "type": "keyword", + "index": true + }, + "updated": { + "type": "date" + } + } + }, + "control_number": { + "type": "keyword", + "index": true + }, + "title": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_title" + ] + }, + "feedback_mail_list": { + "type": "nested", + "properties": { + "author_id": { + "type": "keyword", + "index": true + }, + "email": { + "type": "keyword", + "index": true + } + } + }, + "alternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_title" + ] + }, + "creator": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "creatorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "familyName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "givenName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "creatorAlternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "affiliation": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "affiliationName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + } + } + }, + "contributor": { + "type": "object", + "properties": { + "@attributes": { + "type": "object", + "properties": { + "contributorType": { + "type": "keyword" + } + } + }, + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "contributorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "familyName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "givenName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "contributorAlternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "affiliation": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "affiliationName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + } + } + }, + "accessRights": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "apc": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "rights": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "rightsHolder": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "rightsHolderName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "subject": { + "type": "object", + "properties": { + "value": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "subjectScheme": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "description": { + "type": "object", + "properties": { + "value": { + "type": "keyword", + "ignore_above": 256, + "copy_to": [ + "search_des" + ] + }, + "descriptionType": { + "type": "keyword" + } + } + }, + "publisher": { + "type": "text", + "copy_to": [ + "search_publisher" + ] + }, + "date": { + "type": "nested", + "properties": { + "dateType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + }, + "value": { + "type": "keyword" + } + } + }, + "language": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "version": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "versionType": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "identifierRegistration": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "relation": { + "type": "object", + "properties": { + "relatedIdentifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "relatedTitle": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "relationType": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "index": true + }, + "item_links": { + "type": "keyword", + "index": true + }, + "item_title": { + "type": "keyword", + "index": true + } + } + } + } + }, + "temporal": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "text1": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text2": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text3": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text4": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text5": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text6": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text7": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text8": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text9": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text10": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text11": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text12": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text13": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text14": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text15": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text16": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text17": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text18": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text19": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text20": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text21": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text22": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text23": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text24": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text25": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text26": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text27": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text28": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text29": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text30": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "integer_range1":{ + "type": "integer_range" + }, + "integer_range2":{ + "type": "integer_range" + }, + "integer_range3":{ + "type": "integer_range" + }, + "integer_range4":{ + "type": "integer_range" + }, + "integer_range5":{ + "type": "integer_range" + }, + "float_range1":{ + "type": "float_range" + }, + "float_range2":{ + "type": "float_range" + }, + "float_range3":{ + "type": "float_range" + }, + "float_range4":{ + "type": "float_range" + }, + "float_range5":{ + "type": "float_range" + }, + "geo_point1":{ + "type": "geo_point" + }, + "date_range1":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range2":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range3":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range4":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range5":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + + "geo_shape1":{ + "type" : "geo_shape" + }, + "geoLocation": { + "type": "object", + "properties": { + "geoLocationPoint": { + "type": "object", + "properties": { + "pointLongitude": { + "type": "geo_point" + }, + "pointLatitude": { + "type": "geo_point" + } + } + }, + "geoLocationBox": { + "type": "object", + "properties": { + "westBoundLongitude": { + "type": "geo_point" + }, + "eastBoundLongitude": { + "type": "geo_point" + }, + "southBoundLatitude": { + "type": "geo_point" + }, + "northBoundLatitude": { + "type": "geo_point" + } + } + }, + "geoLocationPlace": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + } + } + }, + "fundingReference": { + "type": "object", + "properties": { + "funderIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "funderName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "awardNumber": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "awardTitle": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "sourceIdentifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "sourceTitle": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + }, + "copy_to": [ + "search_other" + ] + }, + "author_link": { + "type": "text", + "fields" : { + "raw" : { + "type" : "keyword", + "ignore_above" : 256 + } + } + }, + "volume": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "issue": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "numPages": { + "type": "text" + }, + "pageStart": { + "type": "text" + }, + "pageEnd": { + "type": "text" + }, + "dissertationNumber": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "degreeName": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + }, + "copy_to": [ + "search_other" + ] + }, + "dateGranted": { + "type": "keyword" + }, + "degreeGrantor": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "degreeGrantorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other", + "dgName" + ] + } + } + }, + "conference": { + "type": "object", + "properties": { + "conferenceName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferenceSequence": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferencePlace": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferenceCountry": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "file": { + "type": "object", + "properties": { + "URI": { + "type": "nested", + "properties": { + "value": { + "type": "text" + }, + "objectType": { + "type": "keyword", + "index": true + } + } + }, + "mimeType": { + "type": "keyword", + "index": true + }, + "extent": { + "type": "keyword", + "index": true + }, + "date": { + "type": "nested", + "properties": { + "dateType": { + "type": "keyword", + "index": true + }, + "value": { + "type": "keyword" + } + } + }, + "version": { + "type": "text" + } + } + }, + "content": { + "type": "nested", + "properties": { + "file_id": { + "type": "keyword", + "index": true + }, + "groups": { + "type": "keyword", + "index": true + }, + "file_name": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "display_name": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "license_notation": { + "type": "text" + }, + "file": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true, + "fields": { + "ja": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true + } + } + }, + "attachment": { + "properties": { + "content": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true, + "fields": { + "ja": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true + } + } + } + } + } + } + }, + "weko_creator_id": { + "type": "text", + "fielddata": true, + "index": true + }, + "weko_id": { + "type": "text", + "fielddata": true, + "index": true + }, + "search_title": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_creator": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_contributor": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_other": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_identifier": { + "type": "text" + }, + "search_attr": { + "type": "text" + }, + "search_string": { + "type": "text" + }, + "search_publisher": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_des": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "dgName": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + } + }, + "dynamic_templates": [ + { + "weko_id": { + "match_mapping_type": "string", + "match_pattern": "regex", + "match": "^weko_id$", + "mapping": { + "type": "text", + "fielddata": true, + "index": false, + "copy_to": "weko_id" + } + } + }, + { + "string": { + "match_mapping_type": "string", + "mapping": { + "type": "text", + "index": false, + "copy_to": "search_string", + "fields": { + "raw": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + { + "date_string": { + "match_mapping_type": "date", + "mapping": { + "type": "text", + "index": false, + "copy_to": "search_string", + "fields": { + "raw": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + ] + } + } +} diff --git a/modules/invenio-resourcesyncserver/tests/test_query.py b/modules/invenio-resourcesyncserver/tests/test_query.py index c8e4a98462..20a423e029 100644 --- a/modules/invenio-resourcesyncserver/tests/test_query.py +++ b/modules/invenio-resourcesyncserver/tests/test_query.py @@ -4,6 +4,7 @@ import pytest import unittest import datetime +from elasticsearch import helpers from mock import patch, MagicMock, Mock from flask import current_app, make_response, request from flask_login import current_user @@ -18,19 +19,43 @@ # def get_items_by_index_tree(index_tree_id): -def test_get_items_by_index_tree(i18n_app, indices): +# def get_item_changes_by_index(index_tree_id, date_from, date_until): +# .tox/c1/bin/pytest --cov=invenio_resourcesyncserver tests/test_query.py::test_get_items_by_index_tree -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/invenio-resourcesyncserver/.tox/c1/tmp +def test_get_items_by_index_tree(i18n_app, indices, esindex): index_tree_id = 33 + date_from = (datetime.datetime.now() - datetime.timedelta(days=3)).isoformat() + date_until = datetime.datetime.now().isoformat() assert get_items_by_index_tree(index_tree_id) == [] + assert get_item_changes_by_index(index_tree_id, date_from, date_until) == [] + def _generate_es_data(num, start_datetime=datetime.datetime.now()): + for i in range(num): + doc = { + "_index": i18n_app.config['INDEXER_DEFAULT_INDEX'], + "_type": "item-v1.0.0", + "_id": f"2d1a2520-9080-437f-a304-230adc8{i:05d}", + "_source": { + "_item_metadata": { + "title": [f"test_title_{i}"], + }, + "relation_version_is_last": True, + "path": ["66"], + "control_number": f"{i:05d}", + "_created": (start_datetime + datetime.timedelta(seconds=i) - datetime.timedelta(days=2)).isoformat(), + "_updated": (start_datetime + datetime.timedelta(seconds=i) - datetime.timedelta(days=1)).isoformat(), + "publish_date": (datetime.datetime.now() - datetime.timedelta(days=1)).strftime('%Y-%m-%d'), + "publish_status": "0", + }, + } + yield doc -# def get_item_changes_by_index(index_tree_id, date_from, date_until): -def test_get_item_changes_by_index(i18n_app, indices, es): - index_tree_id = 33 - date_from = datetime.datetime.now() - datetime.timedelta(days=3) - date_until = datetime.datetime.now() + generate_data_num = 20002 + helpers.bulk(esindex, _generate_es_data(generate_data_num), refresh='true') - assert get_item_changes_by_index(index_tree_id, date_from, date_until) + # result over 10000 + assert len(get_items_by_index_tree(66)) == generate_data_num + assert len(get_item_changes_by_index(66, date_from, date_until)) == generate_data_num # def item_path_search_factory(search, index_id="0"): From 54301ba3178022b9821a2bde8d4c72a4803798fa Mon Sep 17 00:00:00 2001 From: ivis-nakagawa Date: Fri, 6 Sep 2024 14:24:47 +0900 Subject: [PATCH 5/5] weko#44981 Fix search query --- modules/weko-records/tests/conftest.py | 29 +- .../weko-records/tests/data/item-v1.0.0.json | 1221 +++++++++++++++++ modules/weko-records/tests/test_api.py | 37 +- modules/weko-records/weko_records/api.py | 9 +- 4 files changed, 1287 insertions(+), 9 deletions(-) create mode 100644 modules/weko-records/tests/data/item-v1.0.0.json diff --git a/modules/weko-records/tests/conftest.py b/modules/weko-records/tests/conftest.py index 75bb8c6898..0a3d08d9b7 100644 --- a/modules/weko-records/tests/conftest.py +++ b/modules/weko-records/tests/conftest.py @@ -49,7 +49,7 @@ from invenio_pidrelations import InvenioPIDRelations from invenio_pidstore import InvenioPIDStore from invenio_records import InvenioRecords -from invenio_search import InvenioSearch +from invenio_search import InvenioSearch, current_search_client from weko_admin.models import AdminSettings from weko_deposit import WekoDeposit @@ -100,9 +100,10 @@ def base_app(instance_path): THEME_SITEURL="https://localhost", WEKO_ITEMTYPE_EXCLUDED_KEYS=WEKO_ITEMTYPE_EXCLUDED_KEYS, INDEX_IMG='indextree/36466818-image.jpg', - SEARCH_UI_SEARCH_INDEX='tenant1', + SEARCH_UI_SEARCH_INDEX='test-weko', INDEXER_DEFAULT_DOCTYPE='item-v1.0.0', INDEXER_FILE_DOC_TYPE='content', + INDEXER_DEFAULT_INDEX="{}-weko-item-v1.0.0".format('test'), I18N_LANGUAGES=[("ja", "Japanese"), ("en", "English")], WEKO_PERMISSION_SUPER_ROLE_USER=WEKO_PERMISSION_SUPER_ROLE_USER, WEKO_PERMISSION_ROLE_COMMUNITY=WEKO_PERMISSION_ROLE_COMMUNITY, @@ -333,6 +334,30 @@ def db_index(app, db): db.session.commit() +@pytest.fixture() +def esindex(app): + current_search_client.indices.delete(index="test-*") + with open("tests/data/item-v1.0.0.json", "r") as f: + mapping = json.load(f) + try: + current_search_client.indices.create( + "test-weko-item-v1.0.0", body=mapping + ) + current_search_client.indices.put_alias( + index="test-weko-item-v1.0.0", name="test-weko" + ) + except: + current_search_client.indices.create("test-weko-items", body=mapping) + current_search_client.indices.put_alias( + index="test-weko-items", name="test-weko" + ) + + try: + yield current_search_client + finally: + current_search_client.indices.delete(index="test-*") + + @pytest.fixture() def item_type(app, db): _item_type_name = ItemTypeName(name='test') diff --git a/modules/weko-records/tests/data/item-v1.0.0.json b/modules/weko-records/tests/data/item-v1.0.0.json new file mode 100644 index 0000000000..5f1817ad97 --- /dev/null +++ b/modules/weko-records/tests/data/item-v1.0.0.json @@ -0,0 +1,1221 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 1, + "index.mapping.total_fields.limit": 50000, + "analysis": { + "tokenizer": { + "ja_tokenizer": { + "type": "kuromoji_tokenizer", + "mode": "search" + }, + "ngram_tokenizer": { + "type": "nGram", + "min_gram": 1, + "max_gram": 3, + "token_chars": [ + "letter", + "digit" + ] + } + }, + "char_filter": { + "weko_char_filter": { + "type": "mapping", + "mappings_path": "kui.txt" + } + }, + "analyzer": { + "default": { + "tokenizer": "ngram_tokenizer", + "filter": [ + "kuromoji_baseform", + "kuromoji_part_of_speech", + "cjk_width", + "stop", + "kuromoji_stemmer", + "lowercase" + ], + "char_filter": [ + "weko_char_filter" + ] + }, + "ngram_analyzer": { + "type": "custom", + "char_filter": [ + "weko_char_filter", + "html_strip" + ], + "tokenizer": "ngram_tokenizer", + "filter": [ + "cjk_width", + "lowercase" + ] + }, + "wk_analyzer": { + "type": "custom", + "char_filter": [ + "html_strip" + ], + "tokenizer": "standard", + "filter": [ + "standard", + "lowercase", + "stop", + "cjk_width" + ] + }, + "paths": { + "tokenizer": "path_hierarchy" + } + } + } + }, + "mappings": { + "item-v1.0.0": { + "properties": { + "path": { + "type": "keyword", + "index": true, + "fields": { + "tree": { + "type": "text", + "fielddata": true, + "analyzer": "paths" + } + } + }, + "item_type_id": { + "type": "keyword", + "index": true + }, + "itemtype": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + }, + "fielddata": true, + "copy_to": [ + "search_other" + ] + }, + "publish_status": { + "type": "keyword", + "index": true + }, + "publish_date": { + "type": "date", + "format": "yyyy-MM-dd||yyyy-MM||yyyy" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + }, + "_oai": { + "type": "object", + "properties": { + "id": { + "type": "keyword", + "index": true + }, + "sets": { + "type": "keyword", + "index": true + }, + "updated": { + "type": "date" + } + } + }, + "control_number": { + "type": "keyword", + "index": true + }, + "title": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_title" + ] + }, + "feedback_mail_list": { + "type": "nested", + "properties": { + "author_id": { + "type": "keyword", + "index": true + }, + "email": { + "type": "keyword", + "index": true + } + } + }, + "alternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_title" + ] + }, + "creator": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "creatorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "familyName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "givenName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "creatorAlternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_creator" + ] + }, + "affiliation": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "affiliationName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + } + } + }, + "contributor": { + "type": "object", + "properties": { + "@attributes": { + "type": "object", + "properties": { + "contributorType": { + "type": "keyword" + } + } + }, + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "contributorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "familyName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "givenName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "contributorAlternative": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_contributor" + ] + }, + "affiliation": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "affiliationName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + } + } + }, + "accessRights": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "apc": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "rights": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "rightsHolder": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "rightsHolderName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "subject": { + "type": "object", + "properties": { + "value": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "subjectScheme": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "description": { + "type": "object", + "properties": { + "value": { + "type": "keyword", + "ignore_above": 256, + "copy_to": [ + "search_des" + ] + }, + "descriptionType": { + "type": "keyword" + } + } + }, + "publisher": { + "type": "text", + "copy_to": [ + "search_publisher" + ] + }, + "date": { + "type": "nested", + "properties": { + "dateType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + }, + "value": { + "type": "keyword" + } + } + }, + "language": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "version": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "versionType": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "identifierRegistration": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "relation": { + "type": "object", + "properties": { + "relatedIdentifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "relatedTitle": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "relationType": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "index": true + }, + "item_links": { + "type": "keyword", + "index": true + }, + "item_title": { + "type": "keyword", + "index": true + } + } + } + } + }, + "temporal": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + }, + "text1": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text2": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text3": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text4": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text5": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text6": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text7": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text8": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text9": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text10": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text11": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text12": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text13": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text14": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text15": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text16": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text17": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text18": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text19": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text20": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text21": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text22": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text23": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text24": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text25": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text26": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text27": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text28": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text29": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "text30": { + "type": "text", + "fields": { + "raw": { + "type": "keyword" + } + } + }, + "integer_range1":{ + "type": "integer_range" + }, + "integer_range2":{ + "type": "integer_range" + }, + "integer_range3":{ + "type": "integer_range" + }, + "integer_range4":{ + "type": "integer_range" + }, + "integer_range5":{ + "type": "integer_range" + }, + "float_range1":{ + "type": "float_range" + }, + "float_range2":{ + "type": "float_range" + }, + "float_range3":{ + "type": "float_range" + }, + "float_range4":{ + "type": "float_range" + }, + "float_range5":{ + "type": "float_range" + }, + "geo_point1":{ + "type": "geo_point" + }, + "date_range1":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range2":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range3":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range4":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + "date_range5":{ + "type" : "date_range", + "format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM||yyyy||epoch_millis" + }, + + "geo_shape1":{ + "type" : "geo_shape" + }, + "geoLocation": { + "type": "object", + "properties": { + "geoLocationPoint": { + "type": "object", + "properties": { + "pointLongitude": { + "type": "geo_point" + }, + "pointLatitude": { + "type": "geo_point" + } + } + }, + "geoLocationBox": { + "type": "object", + "properties": { + "westBoundLongitude": { + "type": "geo_point" + }, + "eastBoundLongitude": { + "type": "geo_point" + }, + "southBoundLatitude": { + "type": "geo_point" + }, + "northBoundLatitude": { + "type": "geo_point" + } + } + }, + "geoLocationPlace": { + "type": "keyword", + "copy_to": [ + "search_other" + ] + } + } + }, + "fundingReference": { + "type": "object", + "properties": { + "funderIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "funderName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "awardNumber": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "awardTitle": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "sourceIdentifier": { + "type": "nested", + "properties": { + "value": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "identifierType": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_attr" + ] + } + } + }, + "sourceTitle": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + }, + "copy_to": [ + "search_other" + ] + }, + "author_link": { + "type": "text", + "fields" : { + "raw" : { + "type" : "keyword", + "ignore_above" : 256 + } + } + }, + "volume": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "issue": { + "type": "text", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "numPages": { + "type": "text" + }, + "pageStart": { + "type": "text" + }, + "pageEnd": { + "type": "text" + }, + "dissertationNumber": { + "type": "text", + "copy_to": [ + "search_other" + ] + }, + "degreeName": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + }, + "copy_to": [ + "search_other" + ] + }, + "dateGranted": { + "type": "keyword" + }, + "degreeGrantor": { + "type": "object", + "properties": { + "nameIdentifier": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_identifier" + ] + }, + "degreeGrantorName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other", + "dgName" + ] + } + } + }, + "conference": { + "type": "object", + "properties": { + "conferenceName": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferenceSequence": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferencePlace": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + }, + "conferenceCountry": { + "type": "keyword", + "index": true, + "copy_to": [ + "search_other" + ] + } + } + }, + "file": { + "type": "object", + "properties": { + "URI": { + "type": "nested", + "properties": { + "value": { + "type": "text" + }, + "objectType": { + "type": "keyword", + "index": true + } + } + }, + "mimeType": { + "type": "keyword", + "index": true + }, + "extent": { + "type": "keyword", + "index": true + }, + "date": { + "type": "nested", + "properties": { + "dateType": { + "type": "keyword", + "index": true + }, + "value": { + "type": "keyword" + } + } + }, + "version": { + "type": "text" + } + } + }, + "content": { + "type": "nested", + "properties": { + "file_id": { + "type": "keyword", + "index": true + }, + "groups": { + "type": "keyword", + "index": true + }, + "file_name": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "display_name": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "license_notation": { + "type": "text" + }, + "file": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true, + "fields": { + "ja": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true + } + } + }, + "attachment": { + "properties": { + "content": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true, + "fields": { + "ja": { + "type": "text", + "term_vector": "with_positions_offsets", + "store": true + } + } + } + } + } + } + }, + "weko_creator_id": { + "type": "text", + "fielddata": true, + "index": true + }, + "weko_id": { + "type": "text", + "fielddata": true, + "index": true + }, + "search_title": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_creator": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_contributor": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_other": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_identifier": { + "type": "text" + }, + "search_attr": { + "type": "text" + }, + "search_string": { + "type": "text" + }, + "search_publisher": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "search_des": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + }, + "dgName": { + "type": "text", + "fields": { + "ja": { + "type": "text" + } + } + } + }, + "dynamic_templates": [ + { + "weko_id": { + "match_mapping_type": "string", + "match_pattern": "regex", + "match": "^weko_id$", + "mapping": { + "type": "text", + "fielddata": true, + "index": false, + "copy_to": "weko_id" + } + } + }, + { + "string": { + "match_mapping_type": "string", + "mapping": { + "type": "text", + "index": false, + "copy_to": "search_string", + "fields": { + "raw": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + { + "date_string": { + "match_mapping_type": "date", + "mapping": { + "type": "text", + "index": false, + "copy_to": "search_string", + "fields": { + "raw": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + ] + } + } +} diff --git a/modules/weko-records/tests/test_api.py b/modules/weko-records/tests/test_api.py index f1949250c6..a608a8e426 100644 --- a/modules/weko-records/tests/test_api.py +++ b/modules/weko-records/tests/test_api.py @@ -24,6 +24,7 @@ # from tkinter import W import pytest import json +from elasticsearch import helpers from elasticsearch.exceptions import RequestError from invenio_records.api import Record from invenio_records.errors import MissingModelError @@ -41,7 +42,7 @@ from weko_records.models import ItemType, ItemTypeName, \ SiteLicenseInfo, SiteLicenseIpAddress from jsonschema.validators import Draft4Validator -from datetime import datetime +from datetime import datetime, timedelta # class RecordBase(dict): # .tox/c1/bin/pytest --cov=weko_records tests/test_api.py::test_recordbase -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp @@ -476,6 +477,40 @@ def all_2(): # def __update_item_type(cls, id_, schema, form, render): # def __update_metadata(cls, item_type_id, item_type_name, old_render, new_render): # def __get_records_by_item_type_name(cls, item_type_name): +# .tox/c1/bin/pytest --cov=weko_records tests/test_api.py::test__get_records_by_item_type_name -v -s -vv --cov-branch --cov-report=term --cov-config=tox.ini --basetemp=/code/modules/weko-records/.tox/c1/tmp +def test__get_records_by_item_type_name(app, esindex): + item_type_name = "test_item_type" + def _generate_es_data(num, start_datetime=datetime.now()): + for i in range(num): + doc = { + "_index": "test-weko-item-v1.0.0", + "_type": "item-v1.0.0", + "_id": f"2d1a2520-9080-437f-a304-230adc8{i:05d}", + "_source": { + "_item_metadata": { + "title": [f"test_title_{i}"], + }, + "relation_version_is_last": True, + "path": ["66"], + "control_number": f"{i:05d}", + "_created": (start_datetime + timedelta(seconds=i)).isoformat(), + "_updated": (start_datetime + timedelta(seconds=i)).isoformat(), + "publish_date": (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d'), + "publish_status": "0", + }, + } + if i % 2 == 0: + doc["_source"]["itemtype"] = item_type_name + else: + doc["_source"]["itemtype"] = "test_item_type2" + yield doc + + generate_data_num = 20002 + helpers.bulk(esindex, _generate_es_data(generate_data_num), refresh='true') + + # result over 10000 + assert len(ItemTypes._ItemTypes__get_records_by_item_type_name(item_type_name)) == int(generate_data_num/2) + # class ItemTypes(RecordBase): # def get_record(cls, id_, with_deleted=False): diff --git a/modules/weko-records/weko_records/api.py b/modules/weko-records/weko_records/api.py index d2fc187c59..a086171a6a 100644 --- a/modules/weko-records/weko_records/api.py +++ b/modules/weko-records/weko_records/api.py @@ -585,17 +585,14 @@ def __get_records_by_item_type_name(cls, item_type_name): :param item_type_name: Item Type Name. :return: Record list. """ - name = urllib.parse.quote_plus(item_type_name) - query_string = "itemtype:{}".format( - name) + from weko_search_ui.utils import execute_search_with_pagination result = [] try: search = RecordsSearch( index=current_app.config['SEARCH_UI_SEARCH_INDEX']) - search = search.query(QueryString(query=query_string)) + search = search.query('term', **{"itemtype.keyword": item_type_name}) search = search.sort('-publish_date', '-_updated') - search_result = search.execute().to_dict() - result = search_result.get('hits', {}).get('hits', []) + result = execute_search_with_pagination(search, -1) except NotFoundError as e: current_app.logger.debug("Indexes do not exist yet: ", str(e)) return result