Merge remote-tracking branch 'origin/main' into flatten-mappings-subo…

…bjects-false
felixbarny · Feb 20, 2024 · 7fed28a · 7fed28a
2 parents 6943024 + 5920c91
commit 7fed28a
Show file tree

Hide file tree

Showing 31 changed files with 358 additions and 76 deletions.
diff --git a/docs/changelog/104711.yaml b/docs/changelog/104711.yaml
@@ -0,0 +1,5 @@
+pr: 104711
+summary: "Fixing NPE when requesting [_none_] for `stored_fields`"
+area: Search
+type: bug
+issues: []
diff --git a/docs/changelog/105633.yaml b/docs/changelog/105633.yaml
@@ -0,0 +1,6 @@
+pr: 105633
+summary: "[Connector API] Bugfix: support list type in filtering advenced snippet\
+  \ value"
+area: Application
+type: bug
+issues: []
diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc
@@ -221,6 +221,15 @@ GET _cat/shards?v=true
 ----
 // TEST[setup:my_index]
 
+[discrete]
+[[shard-count-per-node-recommendation]]
+==== Add enough nodes to stay within the cluster shard limits
+
+The <<cluster-shard-limit,cluster shard limits>> prevent creation of more than
+1000 non-frozen shards per node, and 3000 frozen shards per dedicated frozen
+node. Make sure you have enough nodes of each type in your cluster to handle
+the number of shards you need.
+
 [discrete]
 [[field-count-recommendation]]
 ==== Allow enough heap for field mappers and overheads

diff --git a/docs/reference/modules/cluster/misc.asciidoc b/docs/reference/modules/cluster/misc.asciidoc
@@ -24,35 +24,46 @@ API can make the cluster read-write again.
 
 [discrete]
 [[cluster-shard-limit]]
-==== Cluster shard limit
+==== Cluster shard limits
 
-There is a soft limit on the number of shards in a cluster, based on the number
-of nodes in the cluster. This is intended to prevent operations which may
-unintentionally destabilize the cluster.
+There is a limit on the number of shards in a cluster, based on the number of
+nodes in the cluster. This is intended to prevent a runaway process from
+creating too many shards which can harm performance and in extreme cases may
+destabilize your cluster.
 
-IMPORTANT: This limit is intended as a safety net, not a sizing recommendation. The
-exact number of shards your cluster can safely support depends on your hardware
-configuration and workload, but should remain well below this limit in almost
-all cases, as the default limit is set quite high.
+[IMPORTANT]
+====
 
-If an operation, such as creating a new index, restoring a snapshot of an index,
-or opening a closed index would lead to the number of shards in the cluster
-going over this limit, the operation will fail with an error indicating the
-shard limit.
+These limits are intended as a safety net to protect against runaway shard
+creation and are not a sizing recommendation. The exact number of shards your
+cluster can safely support depends on your hardware configuration and workload,
+and may be smaller than the default limits.
 
-If the cluster is already over the limit, due to changes in node membership or
-setting changes, all operations that create or open indices will fail until
-either the limit is increased as described below, or some indices are
-<<indices-open-close,closed>> or <<indices-delete-index,deleted>> to bring the
-number of shards below the limit.
+We do not recommend increasing these limits beyond the defaults. Clusters with
+more shards may appear to run well in normal operation, but may take a very
+long time to recover from temporary disruptions such as a network partition or
+an unexpected node restart, and may encounter problems when performing
+maintenance activities such as a rolling restart or upgrade.
 
-The cluster shard limit defaults to 1,000 shards per non-frozen data node for
+====
+
+If an operation, such as creating a new index, restoring a snapshot of an
+index, or opening a closed index would lead to the number of shards in the
+cluster going over this limit, the operation will fail with an error indicating
+the shard limit. To resolve this, either scale out your cluster by adding
+nodes, or <<indices-delete-index,delete some indices>> to bring the number of
+shards below the limit.
+
+If a cluster is already over the limit, perhaps due to changes in node
+membership or setting changes, all operations that create or open indices will
+fail.
+
+The cluster shard limit defaults to 1000 shards per non-frozen data node for
 normal (non-frozen) indices and 3000 shards per frozen data node for frozen
-indices.
-Both primary and replica shards of all open indices count toward the limit,
-including unassigned shards.
-For example, an open index with 5 primary shards and 2 replicas counts as 15 shards.
-Closed indices do not contribute to the shard count.
+indices. Both primary and replica shards of all open indices count toward the
+limit, including unassigned shards. For example, an open index with 5 primary
+shards and 2 replicas counts as 15 shards. Closed indices do not contribute to
+the shard count.
 
 You can dynamically adjust the cluster shard limit with the following setting:
 
@@ -99,12 +110,13 @@ For example, a cluster with a `cluster.max_shards_per_node.frozen` setting of
 `100` and three frozen data nodes has a frozen shard limit of 300. If the
 cluster already contains 296 shards, {es} rejects any request that adds five or
 more frozen shards to the cluster.
+--
 
-NOTE: These setting do not limit shards for individual nodes. To limit the
-number of shards for each node, use the
+NOTE: These limits only apply to actions which create shards and do not limit
+the number of shards assigned to each node. To limit the number of shards
+assigned to each node, use the
 <<cluster-total-shards-per-node,`cluster.routing.allocation.total_shards_per_node`>>
 setting.
---
 
 [discrete]
 [[user-defined-data]]

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -412,8 +412,7 @@ private SubFieldInfo buildPrefixInfo(MapperBuilderContext context, FieldType fie
              * or a multi-field). This way search will continue to work on old indices and new indices
              * will use the expected full name.
              */
-            String fullName;
-            fullName = indexCreatedVersion.before(IndexVersions.V_7_2_1) ? name() : context.buildFullName(name());
+            String fullName = indexCreatedVersion.before(IndexVersions.V_7_2_1) ? name() : context.buildFullName(name());
             // Copy the index options of the main field to allow phrase queries on
             // the prefix field.
             FieldType pft = new FieldType(fieldType);

diff --git a/server/src/main/java/org/elasticsearch/inference/SemanticTextModelSettings.java b/server/src/main/java/org/elasticsearch/inference/SemanticTextModelSettings.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.inference;
+
+import org.elasticsearch.core.Nullable;
+import org.elasticsearch.xcontent.ConstructingObjectParser;
+import org.elasticsearch.xcontent.ParseField;
+import org.elasticsearch.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Model settings that are interesting for semantic_text inference fields. This class is used to serialize common
+ * ServiceSettings methods when building inference for semantic_text fields.
+ *
+ * @param taskType task type
+ * @param inferenceId inference id
+ * @param dimensions number of dimensions. May be null if not applicable
+ * @param similarity similarity used by the service. May be null if not applicable
+ */
+public record SemanticTextModelSettings(
+    TaskType taskType,
+    String inferenceId,
+    @Nullable Integer dimensions,
+    @Nullable SimilarityMeasure similarity
+) {
+
+    public static final String NAME = "model_settings";
+    private static final ParseField TASK_TYPE_FIELD = new ParseField("task_type");
+    private static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
+    private static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
+    private static final ParseField SIMILARITY_FIELD = new ParseField("similarity");
+
+    public SemanticTextModelSettings(TaskType taskType, String inferenceId, Integer dimensions, SimilarityMeasure similarity) {
+        Objects.requireNonNull(taskType, "task type must not be null");
+        Objects.requireNonNull(inferenceId, "inferenceId must not be null");
+        this.taskType = taskType;
+        this.inferenceId = inferenceId;
+        this.dimensions = dimensions;
+        this.similarity = similarity;
+    }
+
+    public SemanticTextModelSettings(Model model) {
+        this(
+            model.getTaskType(),
+            model.getInferenceEntityId(),
+            model.getServiceSettings().dimensions(),
+            model.getServiceSettings().similarity()
+        );
+    }
+
+    public static SemanticTextModelSettings parse(XContentParser parser) throws IOException {
+        return PARSER.apply(parser, null);
+    }
+
+    private static final ConstructingObjectParser<SemanticTextModelSettings, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
+        TaskType taskType = TaskType.fromString((String) args[0]);
+        String inferenceId = (String) args[1];
+        Integer dimensions = (Integer) args[2];
+        SimilarityMeasure similarity = args[3] == null ? null : SimilarityMeasure.fromString((String) args[2]);
+        return new SemanticTextModelSettings(taskType, inferenceId, dimensions, similarity);
+    });
+    static {
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), TASK_TYPE_FIELD);
+        PARSER.declareString(ConstructingObjectParser.constructorArg(), INFERENCE_ID_FIELD);
+        PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), DIMENSIONS_FIELD);
+        PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SIMILARITY_FIELD);
+    }
+
+    public Map<String, Object> asMap() {
+        Map<String, Object> attrsMap = new HashMap<>();
+        attrsMap.put(TASK_TYPE_FIELD.getPreferredName(), taskType.toString());
+        attrsMap.put(INFERENCE_ID_FIELD.getPreferredName(), inferenceId);
+        if (dimensions != null) {
+            attrsMap.put(DIMENSIONS_FIELD.getPreferredName(), dimensions);
+        }
+        if (similarity != null) {
+            attrsMap.put(SIMILARITY_FIELD.getPreferredName(), similarity);
+        }
+        return Map.of(NAME, attrsMap);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java
@@ -17,4 +17,23 @@ public interface ServiceSettings extends ToXContentObject, VersionedNamedWriteab
      * Returns a {@link ToXContentObject} that only writes the exposed fields. Any hidden fields are not written.
      */
     ToXContentObject getFilteredXContentObject();
+
+    /**
+     * Similarity used in the service. Will be null if not applicable.
+     *
+     * @return similarity
+     */
+    default SimilarityMeasure similarity() {
+        return null;
+    }
+
+    /**
+     * Number of dimensions the service works with. Will be null if not applicable.
+     *
+     * @return number of dimensions
+     */
+    default Integer dimensions() {
+        return null;
+    }
+
 }
diff --git a/...k/inference/common/SimilarityMeasure.java → ...icsearch/inference/SimilarityMeasure.java b/...k/inference/common/SimilarityMeasure.java → ...icsearch/inference/SimilarityMeasure.java
@@ -1,11 +1,12 @@
 /*
  * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
  * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
  */
 
-package org.elasticsearch.xpack.inference.common;
+package org.elasticsearch.inference;
 
 import java.util.Locale;
 

diff --git a/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java
@@ -1337,7 +1337,10 @@ private SearchSourceBuilder parseXContent(
                         SearchSourceBuilder.STORED_FIELDS_FIELD.getPreferredName(),
                         parser
                     );
-                    searchUsage.trackSectionUsage(STORED_FIELDS_FIELD.getPreferredName());
+                    if (storedFieldsContext.fetchFields() == false
+                        || (storedFieldsContext.fieldNames() != null && storedFieldsContext.fieldNames().size() > 0)) {
+                        searchUsage.trackSectionUsage(STORED_FIELDS_FIELD.getPreferredName());
+                    }
                 } else if (SORT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                     sort(parser.text());
                 } else if (PROFILE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
@@ -1493,7 +1496,8 @@ private SearchSourceBuilder parseXContent(
             } else if (token == XContentParser.Token.START_ARRAY) {
                 if (STORED_FIELDS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                     storedFieldsContext = StoredFieldsContext.fromXContent(STORED_FIELDS_FIELD.getPreferredName(), parser);
-                    if (storedFieldsContext.fieldNames().size() > 0 || storedFieldsContext.fetchFields() == false) {
+                    if (storedFieldsContext.fetchFields() == false
+                        || (storedFieldsContext.fieldNames() != null && storedFieldsContext.fieldNames().size() > 0)) {
                         searchUsage.trackSectionUsage(STORED_FIELDS_FIELD.getPreferredName());
                     }
                 } else if (DOCVALUE_FIELDS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {

diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
@@ -1767,7 +1767,9 @@ public void testWaitOnRefreshFailsIfCheckpointNotIndexed() {
         final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index"));
         final IndexShard indexShard = indexService.getShard(0);
         SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true);
-        searchRequest.setWaitForCheckpointsTimeout(TimeValue.timeValueMillis(randomIntBetween(10, 100)));
+        // Increased timeout to avoid cancelling the search task prior to its completion,
+        // as we expect to raise an Exception. Timeout itself is tested on the following `testWaitOnRefreshTimeout` test.
+        searchRequest.setWaitForCheckpointsTimeout(TimeValue.timeValueMillis(randomIntBetween(200, 300)));
         searchRequest.setWaitForCheckpoints(Collections.singletonMap("index", new long[] { 1 }));
 
         final DocWriteResponse response = prepareIndex("index").setSource("id", "1").get();

diff --git a/server/src/test/java/org/elasticsearch/search/builder/SearchSourceBuilderTests.java b/server/src/test/java/org/elasticsearch/search/builder/SearchSourceBuilderTests.java
@@ -65,6 +65,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 import java.util.function.ToLongFunction;
@@ -592,6 +593,28 @@ public void testNegativeTrackTotalHits() throws IOException {
         }
     }
 
+    public void testStoredFieldsUsage() throws IOException {
+        Set<String> storedFieldRestVariations = Set.of(
+            "{\"stored_fields\" : [\"_none_\"]}",
+            "{\"stored_fields\" : \"_none_\"}",
+            "{\"stored_fields\" : [\"field\"]}",
+            "{\"stored_fields\" : \"field\"}"
+        );
+        for (String storedFieldRest : storedFieldRestVariations) {
+            SearchUsageHolder searchUsageHolder = new UsageService().getSearchUsageHolder();
+            try (XContentParser parser = createParser(JsonXContent.jsonXContent, storedFieldRest)) {
+                new SearchSourceBuilder().parseXContent(parser, true, searchUsageHolder, nf -> false);
+                SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats();
+                Map<String, Long> sectionsUsage = searchUsageStats.getSectionsUsage();
+                assertEquals(
+                    "Failed to correctly parse and record usage of '" + storedFieldRest + "'",
+                    1L,
+                    sectionsUsage.get("stored_fields").longValue()
+                );
+            }
+        }
+    }
+
     public void testEmptySectionsAreNotTracked() throws IOException {
         SearchUsageHolder searchUsageHolder = new UsageService().getSearchUsageHolder();
 

diff --git a/...rc/yamlRestTest/resources/rest-api-spec/test/entsearch/332_connector_update_filtering.yml b/...rc/yamlRestTest/resources/rest-api-spec/test/entsearch/332_connector_update_filtering.yml
@@ -23,7 +23,10 @@ setup:
                 advanced_snippet:
                   created_at: "2023-05-25T12:30:00.000Z"
                   updated_at: "2023-05-25T12:30:00.000Z"
-                  value: {}
+                  value:
+                    - tables:
+                        - some_table
+                      query: 'SELECT id, st_geohash(coordinates) FROM my_db.some_table;'
                 rules:
                   - created_at: "2023-05-25T12:30:00.000Z"
                     field: _
@@ -41,7 +44,13 @@ setup:
                 advanced_snippet:
                   created_at: "2023-05-25T12:30:00.000Z"
                   updated_at: "2023-05-25T12:30:00.000Z"
-                  value: {}
+                  value:
+                    - tables:
+                        - some_table
+                      query: 'SELECT id, st_geohash(coordinates) FROM my_db.some_table;'
+                    - tables:
+                        - another_table
+                      query: 'SELECT id, st_geohash(coordinates) FROM my_db.another_table;'
                 rules:
                   - created_at: "2023-05-25T12:30:00.000Z"
                     field: _