HEPData · GraemeWatt · May 1, 2024 · Jan 8, 2024 · Jan 8, 2024 · Jan 9, 2024
diff --git a/hepdata/ext/opensearch/config/record_mapping.py b/hepdata/ext/opensearch/config/record_mapping.py
@@ -198,6 +198,22 @@
             }
         }
     },
+    "resources": {
+        "properties": {
+            "description": {
+                "type": "text"
+            },
+            "type": {
+                "type": "text"
+            },
+            "url": {
+                "type": "text"
+            }
+        }
+    },
+    "data_abstract": {
+      "type": "text"
+    },
     "parent_child_join": {
         "type": "join",
         "relations": {

diff --git a/hepdata/ext/opensearch/document_enhancers.py b/hepdata/ext/opensearch/document_enhancers.py
@@ -34,6 +34,8 @@
 from hepdata.ext.opensearch.config.record_mapping import mapping as os_mapping
 from hepdata.modules.permissions.models import SubmissionParticipant
 from hepdata.modules.submission.api import get_latest_hepsubmission
+from hepdata.modules.submission.models import DataSubmission
+from hepdata.utils.miscellaneous import get_resource_data
 
 FORMATS = ['json', 'root', 'yaml', 'csv', 'yoda']
 
@@ -105,8 +107,8 @@ def add_analyses(doc):
             if reference.file_type in current_app.config['ANALYSES_ENDPOINTS']:
                 doc["analyses"].append({'type': reference.file_type, 'analysis': reference.file_location})
             elif reference.file_type == HISTFACTORY_FILE_TYPE:
-                SITE_URL = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
-                landing_page_url = f"{SITE_URL}/record/resource/{reference.id}?landing_page=true"
+                site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
+                landing_page_url = f"{site_url}/record/resource/{reference.id}?landing_page=true"
                 doc["analyses"].append({'type': reference.file_type, 'analysis': landing_page_url,
                                         'filename': os.path.basename(reference.file_location)})
 
@@ -127,6 +129,44 @@ def add_data_keywords(doc):
     doc['data_keywords'] = dict(agg_keywords)
 
 
+def add_data_abstract(doc):
+    """
+    Adds the data abstract from its associated HEPSubmission to the document object
+
+    :param doc: The document object
+    :return:
+    """
+
+    submission = get_latest_hepsubmission(publication_recid=doc['recid'], overall_status='finished')
+    doc['data_abstract'] = submission.data_abstract
+
+
+def add_data_resources(doc):
+    """
+    Triggers resource data generation of a DataSubmission object.
+    Gets the DataSubmission object, then passes it off for data retrival.
+
+    :param doc: The document object
+    :return:
+    """
+
+    submission = DataSubmission.query.filter_by(doi=doc["doi"]).one()
+    doc['resources'] = get_resource_data(submission)
+
+
+def add_submission_resources(doc):
+    """
+    Triggers resource data generation of a HEPSubmission object.
+    Gets the HEPSubmission object, then passes it off for data retrival.
+
+    :param doc: The document object
+    :return:
+    """
+
+    submission = get_latest_hepsubmission(publication_recid=doc['recid'], overall_status='finished')
+    doc['resources'] = get_resource_data(submission)
+
+
 def process_cmenergies(keywords):
     cmenergies = []
     if keywords['cmenergies']:
@@ -182,13 +222,16 @@ def enhance_data_document(doc):
     add_data_table_urls(doc)
     add_parent_publication(doc)
     add_data_keywords(doc)
+    add_data_resources(doc)
 
 
 def enhance_publication_document(doc):
     add_id(doc)
     add_doc_type(doc, CFG_PUB_TYPE)
     add_data_submission_urls(doc)
+    add_data_abstract(doc)
     add_shortened_authors(doc)
     process_last_updates(doc)
     add_analyses(doc)
     add_parent_child_info(doc)
+    add_submission_resources(doc)
diff --git a/hepdata/ext/opensearch/query_builder.py b/hepdata/ext/opensearch/query_builder.py
@@ -51,7 +51,8 @@ def parse_query(query_string):
                 "cmenergies": "data_keywords.cmenergies",
                 "phrases": "data_keywords.phrases",
                 "reactions": "data_keywords.reactions",
-                "analysis": "analyses.type"
+                "analysis": "analyses.type",
+                "resources": "resources.description"  # Add shorthand for resource description
             }
         }
 

diff --git a/hepdata/modules/records/api.py b/hepdata/modules/records/api.py
@@ -65,7 +65,7 @@
     RelatedTable
 )
 from hepdata.utils.file_extractor import extract
-from hepdata.utils.miscellaneous import sanitize_html
+from hepdata.utils.miscellaneous import sanitize_html, get_resource_data
 from hepdata.utils.users import get_user_from_id
 from bs4 import BeautifulSoup
 from hepdata_converter_ws_client import Error
@@ -993,6 +993,8 @@ def process_data_tables(ctx, data_record_query, first_data_id,
                 "id": submission_record.id, "processed_name": processed_name,
                 "name": submission_record.name,
                 "location": submission_record.location_in_publication,
+                # Generate resource metadata
+                "resources": get_resource_data(submission_record),
                 "doi": submission_record.doi,
                 "description": sanitize_html(
                     truncate_string(submission_record.description, 20),

diff --git a/hepdata/modules/records/utils/data_processing_utils.py b/hepdata/modules/records/utils/data_processing_utils.py
@@ -261,6 +261,7 @@ def generate_table_headers(table_contents):
         "table_license": table_contents["table_license"],
         "related_tables" : table_contents["related_tables"],
         "related_to_this" : table_contents["related_to_this"],
+        "resources" : table_contents["resources"],
         "review": table_contents["review"],
         "associated_files": table_contents["associated_files"],
         "keywords": {},

diff --git a/hepdata/modules/records/views.py b/hepdata/modules/records/views.py
@@ -65,6 +65,7 @@
     update_action_for_submission_participant
 from hepdata.modules.stats.views import increment
 from hepdata.modules.permissions.models import SubmissionParticipant
+from hepdata.utils.miscellaneous import get_resource_data
 
 logging.basicConfig()
 log = logging.getLogger(__name__)
@@ -303,9 +304,9 @@ def get_table_data(data_recid, version):
     return jsonify(generate_table_data(table_contents))
 
 
-@blueprint.route('/data/<int:recid>/<int:data_recid>/<int:version>/', defaults={'load_all': 1})
+@blueprint.route('/data/<int:recid>/<int:data_recid>/<int:version>/')
 @blueprint.route('/data/<int:recid>/<int:data_recid>/<int:version>/<int:load_all>')
-def get_table_details(recid, data_recid, version, load_all):
+def get_table_details(recid, data_recid, version, load_all=1):
     """
     Get the table details of a given datasubmission.
 
@@ -336,6 +337,7 @@ def get_table_details(recid, data_recid, version, load_all):
             table_contents["table_license"] = generate_license_data_by_id(data_record.file_license)
             table_contents["related_tables"] = get_table_data_list(datasub_record, "related")
             table_contents["related_to_this"] = get_table_data_list(datasub_record, "related_to_this")
+            table_contents["resources"] = get_resource_data(datasub_record)
             table_contents["doi"] = datasub_record.doi
             table_contents["location"] = datasub_record.location_in_publication
             table_contents["size"] = size_check["size"]

diff --git a/hepdata/modules/search/templates/hepdata_search/modals/search_help.html b/hepdata/modules/search/templates/hepdata_search/modals/search_help.html
@@ -21,7 +21,7 @@ <h4 class="modal-title">Advanced Search Tips</h4>
                      target="_new">Elasticsearch documentation</a>.</p>
 
                 <div class="well well-small">
-                    <h4>Search on title or abstract</h4>
+                    <h4>Search on title, abstract, or record abstract</h4>
                     <ul>
                         <li>Find all data with <em>collisions</em> in the <strong>title</strong>
                             <br/>
@@ -41,6 +41,16 @@ <h4>Search on title or abstract</h4>
                                 </li>
                             </ul>
                         </li>
+                      <br>
+
+                        <li>Find all data with <em>"CERN-LHC"</em> in the <strong>data abstract</strong>
+                          <li><i>"Comment" in the submission.yaml file</i></li>
+                        <ul>
+                          <li>
+                            <a href='/search?q=data_abstract:CERN-LHC' target="_new">data_abstract:CERN-LHC</a>
+                          </li>
+                        </ul>
+                      </li>
 
                     </ul>
                 </div>
@@ -104,6 +114,43 @@ <h4>Search by keywords</h4>
 
                 <div class="clearfix"></div>
 
+                <div class="well well-small">
+                  <h4>Searching resources by field</h4>
+                    <ul>
+                      <li>Text-based description searching:
+                        <ul>
+                          <li>
+                            <a href='/search?q=resources.description:"Created with hepdata_lib"' target="_new">resources:"Created with hepdata_lib"</a>
+                          </li>
+                        </ul>
+                      </li>
+                      <br>
+
+                      <li>Resource-type searching:
+                        <ul>
+                          <li>
+                            <a href='/search?q=resources.type:png' target="_new">resources.type:png</a>
+                          </li>
+                          <li>Examples: png, html, github, zenodo etc.</li>
+                        </ul>
+                      </li>
+                      <br>
+
+                      <li>Searching for specific URLs:
+                        <ul>
+                          <li>
+                            <a href='/search?q=resources.url:atlas.web.cern.ch' target="_new">resources.url:atlas.web.cern.ch</a>
+                          </li>
+                        </ul>
+                      </li>
+                      <li>
+                        <span class="text-muted">Quotes force a full match.</span>
+                      </li>
+                    </ul>
+                </div>
+
+                <div class="clearfix"></div>
+
                 <div class="well well-small">
                     <h4>Other useful searches</h4>
                     <ul>

diff --git a/hepdata/utils/miscellaneous.py b/hepdata/utils/miscellaneous.py
@@ -23,6 +23,7 @@
 import re
 
 import bleach
+from flask import current_app
 
 
 def splitter(data, predicate):
@@ -70,3 +71,47 @@ def sanitize_html(value, tags=None, attributes=None, strip=False):
     )
 
     return cleaned
+
+def generate_resource_url(resource):
+    """
+    Uses the file_location/ID of a submission object to generate a resource url.
+    If "http" is at the beginning, will return file_location
+    Otherwise, will generate a HEPData resource URL
+
+    :param resource: DataResource object for generation
+    :return: The generated URL string
+    """
+    # Determine if file_location is url or not
+    if resource.file_location.startswith("http"):
+        # Set url value if it's an external location
+        url_string = resource.file_location
+    else:
+        # If not url, create hepdata.net url using resource ID
+        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
+        url_string = f"{site_url}/record/resource/{resource.id}?landing_page=true"
+
+    return url_string
+
+
+def get_resource_data(submission):
+    """
+    Function to create a dictionary of description, type and url for resources objects.
+    This dictionary is to be added to the OpenSearch index.
+    Uses either a DataSubmission, or HEPSubmission, which both contain resource objects.
+
+    :param submission: HEPSubmission/DataSubmission object
+    :return: The resources list (of dictionaries)
+    """
+    resources = []
+
+    # Create a dictionary entry for every resource
+    for s in submission.resources:
+        resource_data = {
+            "description": s.file_description,
+            "type": s.file_type,
+            "url": generate_resource_url(s)
+        }
+
+        resources.append(resource_data)
+
+    return resources
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -156,7 +156,12 @@ def get_identifiers():
             {"hepdata_id": "ins1245023", "inspire_id": '1245023',
              "title": "High-statistics study of $K^0_S$ pair production in two-photon collisions",
              "data_tables": 40,
-             "arxiv": "arXiv:1307.7457"}
+             "arxiv": "arXiv:1307.7457"},
+            {"hepdata_id": "ins2751932", "inspire_id": '2751932',
+             "title": "Search for pair production of higgsinos in events with two Higgs bosons and missing "
+                      "transverse momentum in $\\sqrt{s}=13$ TeV $pp$ collisions at the ATLAS experiment",
+             "data_tables": 66,
+             "arxiv": "arXiv:2401.14922"}
             ]
 
 @pytest.fixture()

diff --git a/tests/dashboard_test.py b/tests/dashboard_test.py
@@ -476,11 +476,12 @@ def test_submissions_csv(app, admin_idx, load_default_data, identifiers):
         site_url = app.config.get('SITE_URL', 'https://www.hepdata.net')
         csv_data = get_submissions_csv(user, include_imported=True)
         csv_lines = csv_data.splitlines()
-        assert len(csv_lines) == 3
+        assert len(csv_lines) == 4
         assert csv_lines[0] == 'hepdata_id,version,url,inspire_id,arxiv_id,title,collaboration,creation_date,last_updated,status,uploaders,reviewers'
         today = datetime.datetime.utcnow().date().isoformat()
         assert csv_lines[1] == f'16,1,{site_url}/record/16,1245023,arXiv:1307.7457,High-statistics study of $K^0_S$ pair production in two-photon collisions,Belle,{today},2013-12-17,finished,,'
         assert csv_lines[2] == f'1,1,{site_url}/record/1,1283842,arXiv:1403.1294,Measurement of the forward-backward asymmetry in the distribution of leptons in $t\\bar{{t}}$ events in the lepton+jets channel,D0,{today},2014-08-11,finished,,'
+        assert csv_lines[3] == f'57,1,{site_url}/record/57,2751932,arXiv:2401.14922,Search for pair production of higgsinos in events with two Higgs bosons and missing transverse momentum in $\sqrt{{s}}=13$ TeV $pp$ collisions at the ATLAS experiment,ATLAS,{today},{today},finished,,'
 
         # Get data without imported records - should be empty (headers only)
         csv_data = get_submissions_csv(user, include_imported=False)
@@ -524,5 +525,5 @@ def test_submissions_csv(app, admin_idx, load_default_data, identifiers):
         # Get CSV again - should be uploader and reviewers in line 2 now
         csv_data = get_submissions_csv(user, include_imported=True)
         csv_lines = csv_data.splitlines()
-        assert len(csv_lines) == 3
+        assert len(csv_lines) == 4
         assert csv_lines[2] == f'1,1,{site_url}/record/1,1283842,arXiv:1403.1294,Measurement of the forward-backward asymmetry in the distribution of leptons in $t\\bar{{t}}$ events in the lepton+jets channel,D0,{today},2014-08-11,finished,test@test.com (Una Uploader),test2@test.com (Rowan Reviewer) | test@hepdata.net'
diff --git a/tests/doi_minter_test.py b/tests/doi_minter_test.py
@@ -307,7 +307,7 @@ def test_generate_dois_for_submission(mock_data_cite_provider, identifiers):
     mock_data_cite_provider.reset_mock()
     record_information = create_record({})
     recid = record_information['recid']
-    assert recid == 106
+    assert recid == 173
     hep_submission = get_or_create_hepsubmission(recid)
     generate_dois_for_submission(recid, recid)
     mock_data_cite_provider.assert_not_called()
@@ -334,9 +334,9 @@ def test_generate_dois_for_submission(mock_data_cite_provider, identifiers):
     # Generate DOIs again - should work and call `create` for record, v1, table
     generate_dois_for_submission(recid, recid)
     mock_data_cite_provider.create.assert_has_calls([
-        call('10.17182/hepdata.106'),
-        call('10.17182/hepdata.106.v1'),
-        call('10.17182/hepdata.106.v1/t1')
+        call('10.17182/hepdata.173'),
+        call('10.17182/hepdata.173.v1'),
+        call('10.17182/hepdata.173.v1/t1')
     ])
     # Should have twice as many get calls as register calls (because get is called by create)
     assert mock_data_cite_provider.get.call_count == 6

diff --git a/tests/e2e/test_dashboard.py b/tests/e2e/test_dashboard.py
@@ -206,7 +206,7 @@ def test_dashboard(live_server, logged_in_browser):
     )
     assert(response.status_code == 200)
     decoded_lines = response.content.decode('utf-8').splitlines()
-    assert len(decoded_lines) == 4
+    assert len(decoded_lines) == 5
     csv_reader = csv.reader(decoded_lines)
     for row in csv_reader:
         assert len(row) == 12