Skip to content

Commit 6630fe2

Browse files
strogiyotecAlmas Abdrazak
andauthored
* JAVA-5988 * JAVA-5988 address PR comments * JAVA-5988 add a unit test to AggregatesTest * JAVA-5988 embedding fluent API with builders * JAVA-5988 introduce AbstractVectorSearchQuery * JAVA-5988 fix checkstyle * JAVA05988 use VectorSearchQuery as param * JAVA-5988 add more unit tests * JAVA-5988 add autoembedding to Scala --------- Co-authored-by: Almas Abdrazak <abdrazak.almas@mongodb.com>
1 parent 6fd2c95 commit 6630fe2

10 files changed

Lines changed: 695 additions & 19 deletions

File tree

driver-core/src/main/com/mongodb/client/model/Aggregates.java

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222
import com.mongodb.client.model.fill.FillOptions;
2323
import com.mongodb.client.model.fill.FillOutputField;
2424
import com.mongodb.client.model.geojson.Point;
25+
import com.mongodb.internal.client.model.search.AbstractVectorSearchQuery;
2526
import com.mongodb.client.model.search.FieldSearchPath;
2627
import com.mongodb.client.model.search.SearchCollector;
2728
import com.mongodb.client.model.search.SearchOperator;
2829
import com.mongodb.client.model.search.SearchOptions;
30+
import com.mongodb.client.model.search.TextVectorSearchQuery;
2931
import com.mongodb.client.model.search.VectorSearchOptions;
32+
import com.mongodb.client.model.search.VectorSearchQuery;
3033
import com.mongodb.lang.Nullable;
3134
import org.bson.BsonArray;
3235
import org.bson.BsonBoolean;
@@ -38,6 +41,8 @@
3841
import org.bson.BsonValue;
3942
import org.bson.Document;
4043
import org.bson.BinaryVector;
44+
import org.bson.annotations.Beta;
45+
import org.bson.annotations.Reason;
4146
import org.bson.codecs.configuration.CodecRegistry;
4247
import org.bson.conversions.Bson;
4348

@@ -65,6 +70,9 @@
6570
@SuppressWarnings("overloads")
6671
public final class Aggregates {
6772

73+
private Aggregates() {
74+
}
75+
6876
/**
6977
* Creates an $addFields pipeline stage
7078
*
@@ -967,6 +975,41 @@ public static Bson vectorSearch(
967975
return new VectorSearchBson(path, queryVector, index, limit, options);
968976
}
969977

978+
/**
979+
* Creates a {@code $vectorSearch} pipeline stage supported by MongoDB Atlas with automated embedding.
980+
* You may use the {@code $meta: "vectorSearchScore"} expression, e.g., via {@link Projections#metaVectorSearchScore(String)},
981+
* to extract the relevance score assigned to each found document.
982+
* <p>
983+
* This overload is used for auto-embedding in Atlas. The server will automatically generate embeddings
984+
* for the query using the model specified in the index definition or via {@link TextVectorSearchQuery#model(String)}.
985+
* </p>
986+
*
987+
* @param path The field to be searched.
988+
* @param query The query specification, typically created via {@link VectorSearchQuery#textQuery(String)}.
989+
* @param index The name of the index to use.
990+
* @param limit The limit on the number of documents produced by the pipeline stage.
991+
* @param options Optional {@code $vectorSearch} pipeline stage fields.
992+
* @return The {@code $vectorSearch} pipeline stage.
993+
*
994+
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
995+
* @mongodb.atlas.manual atlas-search/scoring/ Scoring
996+
* @mongodb.server.release 6.0.11
997+
* @since 5.7.0
998+
*/
999+
@Beta(Reason.SERVER)
1000+
public static Bson vectorSearch(
1001+
final FieldSearchPath path,
1002+
final VectorSearchQuery query,
1003+
final String index,
1004+
final long limit,
1005+
final VectorSearchOptions options) {
1006+
notNull("path", path);
1007+
notNull("query", query);
1008+
notNull("index", index);
1009+
notNull("options", options);
1010+
return new VectorSearchQueryBson(path, query, index, limit, options);
1011+
}
1012+
9701013
/**
9711014
* Creates a {@code $vectorSearch} pipeline stage supported by MongoDB Atlas.
9721015
* You may use the {@code $meta: "vectorSearchScore"} expression, e.g., via {@link Projections#metaVectorSearchScore(String)},
@@ -2155,6 +2198,60 @@ public String toString() {
21552198
}
21562199
}
21572200

2201+
2202+
/**
2203+
* Same as {@link Aggregates.VectorSearchBson} but uses a query expression instead of a query vector.
2204+
*/
2205+
private static class VectorSearchQueryBson implements Bson {
2206+
private final FieldSearchPath path;
2207+
private final VectorSearchQuery query;
2208+
private final String index;
2209+
private final long limit;
2210+
private final VectorSearchOptions options;
2211+
2212+
/**
2213+
* Given model name must be compatible with the one in the index definition.
2214+
*/
2215+
private final String embeddingModelName;
2216+
2217+
VectorSearchQueryBson(final FieldSearchPath path, final VectorSearchQuery query,
2218+
final String index, final long limit,
2219+
final VectorSearchOptions options) {
2220+
this.path = path;
2221+
this.query = query;
2222+
this.index = index;
2223+
this.limit = limit;
2224+
this.options = options;
2225+
// when null then model name from the index definition will be used by the server
2226+
this.embeddingModelName = ((AbstractVectorSearchQuery) query).getModel();
2227+
}
2228+
2229+
@Override
2230+
public <TDocument> BsonDocument toBsonDocument(final Class<TDocument> documentClass, final CodecRegistry codecRegistry) {
2231+
Document specificationDoc = new Document("path", path.toValue())
2232+
.append("query", query)
2233+
.append("index", index)
2234+
.append("limit", limit);
2235+
if (embeddingModelName != null) {
2236+
specificationDoc.append("model", embeddingModelName);
2237+
}
2238+
specificationDoc.putAll(options.toBsonDocument(documentClass, codecRegistry));
2239+
return new Document("$vectorSearch", specificationDoc).toBsonDocument(documentClass, codecRegistry);
2240+
}
2241+
2242+
@Override
2243+
public String toString() {
2244+
return "Stage{name=$vectorSearch"
2245+
+ ", path=" + path
2246+
+ ", query=" + query
2247+
+ ", index=" + index
2248+
+ ", limit=" + limit
2249+
+ ", model=" + embeddingModelName
2250+
+ ", options=" + options
2251+
+ '}';
2252+
}
2253+
}
2254+
21582255
private static class VectorSearchBson implements Bson {
21592256
private final FieldSearchPath path;
21602257
private final Object queryVector;
@@ -2193,7 +2290,4 @@ public String toString() {
21932290
+ '}';
21942291
}
21952292
}
2196-
2197-
private Aggregates() {
2198-
}
21992293
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.mongodb.client.model.search;
17+
18+
import com.mongodb.annotations.Beta;
19+
import com.mongodb.annotations.Reason;
20+
import com.mongodb.annotations.Sealed;
21+
22+
/**
23+
* A text-based vector search query for MongoDB Atlas auto-embedding.
24+
* <p>
25+
* This interface extends {@link VectorSearchQuery} and provides methods for configuring
26+
* text-based queries that will be automatically embedded by the server.
27+
* </p>
28+
*
29+
* @see VectorSearchQuery#textQuery(String)
30+
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
31+
* @since 5.7.0
32+
*/
33+
@Sealed
34+
@Beta(Reason.SERVER)
35+
public interface TextVectorSearchQuery extends VectorSearchQuery {
36+
/**
37+
* Specifies the embedding model to use for generating embeddings from the query text.
38+
* <p>
39+
* If not specified, the model configured in the vector search index definition will be used.
40+
* The specified model must be compatible with the model used in the index definition.
41+
* </p>
42+
*
43+
* @param modelName The name of the embedding model to use (e.g., "voyage-4-large").
44+
* @return A new {@link TextVectorSearchQuery} with the specified model.
45+
*/
46+
TextVectorSearchQuery model(String modelName);
47+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.mongodb.client.model.search;
17+
18+
import com.mongodb.internal.client.model.search.AbstractVectorSearchQuery;
19+
import com.mongodb.lang.Nullable;
20+
import org.bson.BsonDocument;
21+
import org.bson.Document;
22+
import org.bson.codecs.configuration.CodecRegistry;
23+
24+
import static com.mongodb.assertions.Assertions.notNull;
25+
26+
/**
27+
* Package-private implementation of {@link TextVectorSearchQuery}.
28+
*/
29+
final class TextVectorSearchQueryImpl extends AbstractVectorSearchQuery implements TextVectorSearchQuery {
30+
private final String text;
31+
@Nullable
32+
private final String model;
33+
34+
TextVectorSearchQueryImpl(final String text, @Nullable final String model) {
35+
this.text = notNull("text", text);
36+
this.model = model;
37+
}
38+
39+
@Override
40+
public TextVectorSearchQuery model(final String modelName) {
41+
return new TextVectorSearchQueryImpl(text, notNull("modelName", modelName));
42+
}
43+
44+
@Override
45+
@Nullable
46+
public String getModel() {
47+
return model;
48+
}
49+
50+
@Override
51+
public <TDocument> BsonDocument toBsonDocument(final Class<TDocument> documentClass, final CodecRegistry codecRegistry) {
52+
return new Document("text", text).toBsonDocument(documentClass, codecRegistry);
53+
}
54+
55+
@Override
56+
public String toString() {
57+
return "TextVectorSearchQuery{"
58+
+ "text='" + text + '\''
59+
+ ", model=" + (model != null ? "'" + model + '\'' : "null")
60+
+ '}';
61+
}
62+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.mongodb.client.model.search;
17+
18+
import com.mongodb.annotations.Beta;
19+
import com.mongodb.annotations.Reason;
20+
import com.mongodb.annotations.Sealed;
21+
import org.bson.conversions.Bson;
22+
23+
import static com.mongodb.assertions.Assertions.notNull;
24+
25+
/**
26+
* A query specification for MongoDB Atlas vector search with automated embedding.
27+
* <p>
28+
* This interface provides factory methods for creating type-safe query objects that can be used
29+
* with the {@code $vectorSearch} aggregation pipeline stage for auto-embedding functionality.
30+
* </p>
31+
*
32+
* @mongodb.atlas.manual atlas-vector-search/vector-search-stage/ $vectorSearch
33+
* @since 5.7.0
34+
*/
35+
@Sealed
36+
@Beta(Reason.SERVER)
37+
public interface VectorSearchQuery extends Bson {
38+
/**
39+
* Creates a text-based vector search query that will be automatically embedded by the server.
40+
* <p>
41+
* The server will generate embeddings for the provided text using the model specified in the
42+
* vector search index definition, or an explicitly specified model via {@link TextVectorSearchQuery#model(String)}.
43+
* </p>
44+
*
45+
* @param text The text to be embedded and searched.
46+
* @return A {@link TextVectorSearchQuery} that can be further configured.
47+
*/
48+
static TextVectorSearchQuery textQuery(final String text) {
49+
return new TextVectorSearchQueryImpl(notNull("text", text), null);
50+
}
51+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.mongodb.internal.client.model.search;
18+
19+
import com.mongodb.client.model.search.VectorSearchQuery;
20+
21+
public abstract class AbstractVectorSearchQuery implements VectorSearchQuery {
22+
23+
public abstract String getModel();
24+
25+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright 2008-present MongoDB, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
/**
18+
* This package contains internal functionality that may change at any time.
19+
*/
20+
21+
@Internal
22+
@NonNullApi
23+
package com.mongodb.internal.client.model.search;
24+
25+
import com.mongodb.annotations.Internal;
26+
import com.mongodb.lang.NonNullApi;

0 commit comments

Comments
 (0)