Files
doc-exports/docs/css/umn/css_01_0129.html
zhengxiu 93d856d5c5 css umn 25.6.0 version
Reviewed-by: Pruthi, Vineet <vineet.pruthi@t-systems.com>
Co-authored-by: zhengxiu <zhengxiu@huawei.com>
Co-committed-by: zhengxiu <zhengxiu@huawei.com>
2025-11-25 11:34:43 +00:00

122 lines
5.1 KiB
HTML

<a name="EN-US_TOPIC_0000001965496793"></a><a name="EN-US_TOPIC_0000001965496793"></a>
<h1 class="topictitle1">Sample Python Code for Vector Search</h1>
<div id="body0000001261749932"><p id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_p143381112172510">Elasticsearch provides standard REST APIs and clients developed using Java and Python.</p>
<p id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_p1675193372415">This section provides a sample of Python code for creating vector indexes, and importing and querying vector data. It shows how to use the client to implement vector search.</p>
<div class="section" id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_section1864223419264"><h4 class="sectiontitle">Prerequisites</h4><p id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_p1299195810328">The Python dependency package has been installed on the client. If it is not installed, run the following commands to install it:</p>
<pre class="screen" id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_screen1932333123212"># Set the actual cluster version. 7.6 is used in this example.
pip install elasticsearch==<em id="EN-US_TOPIC_0000001965496793__i798118663717">7.6</em></pre>
</div>
<div class="section" id="EN-US_TOPIC_0000001965496793__en-us_topic_0000001261749932_section10885342132619"><h4 class="sectiontitle">Sample Code</h4><pre class="screen" id="EN-US_TOPIC_0000001965496793__screen842883191417">from elasticsearch import Elasticsearch
from elasticsearch import helpers
# Create the Elasticsearch client.
def get_client(hosts: list, user: str = None, password: str = None):
if user and password:
return Elasticsearch(hosts, http_auth=(user, password), verify_certs=False, ssl_show_warn=False)
else:
return Elasticsearch(hosts)
# Create an index table.
def create(client: Elasticsearch, index: str):
# Index mapping information
index_mapping = {
"settings": {
"index": {
"vector": "true", # Enable the vector feature.
"number_of_shards": 1, # Set the number of index shards as needed.
"number_of_replicas": 0 # Set the number of index replicas as needed.
}
},
"mappings": {
"properties": {
"my_vector": {
"type": "vector",
"dimension": 2,
"indexing": True,
"algorithm": "GRAPH",
"metric": "euclidean"
}
# Other fields can be added if necessary.
}
}
}
res = client.indices.create(index=index, body=index_mapping)
print("create index result: ", res)
# Write data.
def write(client: Elasticsearch, index: str, vecs: list, bulk_size=500):
for i in range(0, len(vecs), bulk_size):
actions = [
{
"_index": index,
"my_vector": vec,
# Other fields can be added if necessary.
}
for vec in vecs[i: i+bulk_size]
]
success, errors = helpers.bulk(client, actions, request_timeout=3600)
if errors:
print("write bulk failed with errors: ", errors) # Handle the error as needed.
else:
print("write bulk {} docs success".format(success))
client.indices.refresh(index=index, request_timeout=3600)
# Query a vector index.
def search(client: Elasticsearch, index: str, query: list, size: int):
# Query statement. Select an appropriate query method.
query_body = {
"size": size,
"query": {
"vector": {
"my_vector": {
"vector": query,
"topk": size
}
}
}
}
res = client.search(index=index, body=query_body)
print("search index result: ", res)
# Delete an index.
def delete(client: Elasticsearch, index: str):
res = client.indices.delete(index=index)
print("delete index result: ", res)
if __name__ == '__main__':
# For a non-security cluster, run the following:
es_client = get_client(hosts=['http://xx.xx.xx.xx:9200'])
# For a security-mode cluster that uses HTTPS, run the following:
# es_client = get_client(hosts=['https://xx.xx.xx.xx:9200', 'https://xx.xx.xx.xx:9200'], user='xxxxx', password='xxxxx')
# For a security-mode cluster with HTTPS disabled, run the following:
# es_client = get_client(hosts=['http://xx.xx.xx.xx:9200', 'http://xx.xx.xx.xx:9200'], user='xxxxx', password='xxxxx')
# Test the index name.
index_name = "my_index"
# Create an index.
create(es_client, index=index_name)
# Write data.
data = [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]
write(es_client, index=index_name, vecs=data)
# Query an index.
query_vector = [1.0, 1.0]
search(es_client, index=index_name, query=query_vector, size=3)
# Delete an index.
delete(es_client, index=index_name)
</pre>
</div>
</div>
<div>
<div class="familylinks">
<div class="parentlink"><strong>Parent topic:</strong> <a href="css_01_0117.html">Configuring Vector Search for Elasticsearch Clusters</a></div>
</div>
</div>