本文最后编辑于   前,其中的内容可能需要更新。
                
                
                    
                
                 
                Milvus向量嵌入 Milvus 是一个向量库,可用于AI的文本检索,本文使用官方文档例子,例子语言为python
安装依赖 1 pip3 install pymilvus FlagEmbedding 
 
定义向量生成模型 此处使用默认轻量级模型,也可通过milvus_model.hybrid.BGEM3EmbeddingFunctionj加载指定模型
1 2 ef = model.DefaultEmbeddingFunction() 
 
准备嵌入数据 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 docs = [     "Artificial intelligence was founded as an academic discipline in 1956." ,     "Alan Turing was the first person to conduct substantial research in AI." ,     "Born in Maida Vale, London, Turing was raised in southern England." , ] embeddings = ef.encode_documents(docs) query_embedding = ef.encode_documents(["where was Turing born?" ]) entities = [{"id" : index_id, "vector" : embeddings[index_id], "source" : docs[index_id]}             for  index_id in  range (len (docs))] 
 
初始化client 也是简单的事
1 2 3 client = MilvusClient(     uri="http://HOST:PORT"  ) 
 
创建集合 需要先定义schema, 然后直接创建即可
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 schema = MilvusClient.create_schema(     auto_id=False ,     enable_dynamic_field=True , ) schema.add_field(field_name="id" , datatype=DataType.INT64, is_primary=True ) schema.add_field(field_name="vector" , datatype=DataType.FLOAT_VECTOR, dim=768 ) schema.add_field(field_name="source" , datatype=DataType.VARCHAR, max_length=500 ) client.create_collection(     collection_name="customized_setup" ,     schema=schema, ) 
 
创建索引 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 index_params = MilvusClient.prepare_index_params() index_params.add_index(     field_name="vector" ,      metric_type="COSINE" ,      index_type="IVF_FLAT" ,      index_name="vector_index" ,     nlist=1024    ) client.create_index(     collection_name="customized_setup" ,     index_params=index_params ) 
 
查看索引信息 1 2 3 4 5 6 res = client.describe_index(     collection_name="customized_setup" ,     index_name="vector_index"  ) print (res)
 
向集合插入数据 1 2 3 4 5 6 7 res = client.insert(     "customized_setup" ,     entities     ) print (res)
 
查看集合数据 1 2 3 4 5 6 7 8 9 10 client.load_collection("customized_setup" ) client.load_collection("customized_setup" ) res = client.get(     collection_name="customized_setup" ,      ids=[0 , 1 , 2 ],     output_fields=["source" , "vector" ] ) for  i in  res:    print (i) 
 
搜索数据 1 2 3 4 5 6 7 8 9 10 res = client.search(     collection_name="customized_setup" ,      data=query_embedding,      limit=1 ,                   output_fields=["source" ]   ) for  result in  res:    print (result) 
 
删除数据 1 2 3 4 5 6 7 8 res = client.delete(     collection_name='customized_setup' ,     ids=[0 , 1 , 2 ], ) print ("Entities deleted from partitionA: " , res['delete_count' ])