Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DR-629 index refactor #2567

Draft
wants to merge 80 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 77 commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
49d555c
framework
daveatweaviate Sep 9, 2024
610d790
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 11, 2024
cf3c4e7
index slide
daveatweaviate Sep 11, 2024
27b58fa
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 11, 2024
5bc5e84
asynch
daveatweaviate Sep 11, 2024
5bcd486
files
daveatweaviate Sep 11, 2024
7de7526
sub pages
daveatweaviate Sep 11, 2024
0b21c93
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 12, 2024
0ab0e7f
vector indexes
daveatweaviate Sep 12, 2024
333623c
py code
daveatweaviate Sep 12, 2024
5caa2d6
python linter
daveatweaviate Sep 12, 2024
74b4b4f
ts code
daveatweaviate Sep 13, 2024
110ecaa
ts code
daveatweaviate Sep 13, 2024
059ad73
ts v3
daveatweaviate Sep 13, 2024
fe0b9c2
ts2
daveatweaviate Sep 13, 2024
efc6c2c
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 13, 2024
448bfe0
ts2
daveatweaviate Sep 13, 2024
3b9cfff
staging
daveatweaviate Sep 13, 2024
d2bef9c
py4
daveatweaviate Sep 13, 2024
b631228
staging and py3
daveatweaviate Sep 13, 2024
f0f6234
ts3
daveatweaviate Sep 14, 2024
70ad7de
ts3
daveatweaviate Sep 14, 2024
4db98ff
dynamic
daveatweaviate Sep 14, 2024
64fcbf3
py4
daveatweaviate Sep 14, 2024
ecac70e
dynamic py3
daveatweaviate Sep 14, 2024
6e4a16b
ts3
daveatweaviate Sep 14, 2024
da9647f
ts2
daveatweaviate Sep 14, 2024
758baea
inverted indexes
daveatweaviate Sep 14, 2024
de36ec8
fix range index copy
daveatweaviate Sep 14, 2024
c87c5fb
indexing copy
daveatweaviate Sep 14, 2024
f50d1df
empty table
daveatweaviate Sep 14, 2024
959e650
py4
daveatweaviate Sep 14, 2024
3d1fac7
py3
daveatweaviate Sep 14, 2024
5357a78
ts3
daveatweaviate Sep 14, 2024
7b47612
ts code
daveatweaviate Sep 16, 2024
55c4f5f
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 16, 2024
7ec1ac3
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 16, 2024
b5140ee
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 16, 2024
250e2fb
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 16, 2024
2348252
pre spilt up
daveatweaviate Sep 16, 2024
df126cb
vector overview
daveatweaviate Sep 16, 2024
e9ae768
split pages
daveatweaviate Sep 16, 2024
5d65a82
hnsw start
daveatweaviate Sep 17, 2024
cb11072
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 17, 2024
25e31c8
vector updates
daveatweaviate Sep 17, 2024
20cc13c
start inverted
daveatweaviate Sep 17, 2024
ad31e23
py3
daveatweaviate Sep 17, 2024
8de50d3
py4
daveatweaviate Sep 17, 2024
0d44601
ts3
daveatweaviate Sep 17, 2024
593b449
ts2
daveatweaviate Sep 17, 2024
e1c6cd6
dynamic
daveatweaviate Sep 18, 2024
0ab9af9
dynamic
daveatweaviate Sep 18, 2024
fa071f0
dynamic
daveatweaviate Sep 18, 2024
a213785
ts2 async delete timing
daveatweaviate Sep 18, 2024
74ff3db
ts3 async delete timing
daveatweaviate Sep 18, 2024
82c8e31
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 18, 2024
5824950
dynamic
daveatweaviate Sep 19, 2024
178ff2f
reorder sections
daveatweaviate Sep 19, 2024
cdf5022
move around pages, update links
daveatweaviate Sep 19, 2024
1ec1b36
move around pages, update links
daveatweaviate Sep 19, 2024
e484050
toc
daveatweaviate Sep 19, 2024
bcea5b6
tons of links
daveatweaviate Sep 19, 2024
e9bd2b2
more links
daveatweaviate Sep 20, 2024
a7bdb86
more links
daveatweaviate Sep 20, 2024
9c6418b
links
daveatweaviate Sep 20, 2024
7640b58
tsc updates
daveatweaviate Sep 20, 2024
10054b3
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 20, 2024
238098a
dynamic
daveatweaviate Sep 22, 2024
9fb689b
dynamic indexes
daveatweaviate Sep 23, 2024
516cd52
dynamic indexes
daveatweaviate Sep 23, 2024
ca8d984
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 23, 2024
3243c83
vector overview
daveatweaviate Sep 23, 2024
5cd9a03
vector overview
daveatweaviate Sep 23, 2024
7bbca97
inverted
daveatweaviate Sep 23, 2024
6cadbbe
inverted
daveatweaviate Sep 23, 2024
bfc0a02
inverted
daveatweaviate Sep 23, 2024
1600c5f
hnsw
daveatweaviate Sep 23, 2024
40c1a1b
Merge branch 'main' of github.com:weaviate/weaviate-io into DR-629-in…
daveatweaviate Sep 25, 2024
3dcd799
review feedback
daveatweaviate Sep 25, 2024
c565e72
review feedback
daveatweaviate Sep 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions _includes/code/howto/indexes/indexes-dynamic-v2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// TODO: Configure as part of the test harness
// TODO: Needs tests

// Imports
import weaviate, { WeaviateClient } from 'weaviate-ts-client';

// Create client connection
function getClient(){
const client: WeaviateClient = weaviate.client({
scheme: 'http',
host: 'localhost:8080',
})

return client;
}

// Delete pre-existing collections
async function deleteClass(client: WeaviateClient, className: string){
if (await client.schema.exists(className)) {
await client.schema.classDeleter().withClassName(className).do();
}
}

////////////////////
// ENABLE DYNAMIC //
////////////////////

// START EnableDynamic
async function createDynamicCollection(client: WeaviateClient, className: string){
const setIndexType = {
class: className,
// Add property definitions
vectorizer: 'text2vec-openai',
vectorIndexType: 'dynamic',
};

// Add the class to the schema
await client.schema.classCreator().withClass(setIndexType).do();
}
// END EnableDynamic

////////////////////
// CONFIGURE DYNAMIC //
////////////////////

// START ConfigDynamic
async function configureDynamicCollection(client: WeaviateClient, className: string){
const setIndexType = {
class: className,
// Add property definitions
vectorizer: 'text2vec-openai',
vectorIndexType: 'dynamic',
vectorIndexConfig: {
distance: 'cosine',
vector_cache_max_objects: 100000,
bq: { enabled: true, },
},
vectorIndexConfigDynamic: {
distance: 'cosine',
ef_construction: '256', // Dynamic list size during construction
max_connections: '128', // Maximum number of connections per node
ef: '-1', // Dynamic list size during search; -1 enables dynamic Ef
dynamic_ef_factor: '15', // Multiplier for dynamic Ef
dynamic_ef_min: '200', // Minimum threshold for dynamic Ef
dynamic_ef_max: '1000', // Maximum threshold for dynamic Ef
quantizer: 'Configure.VectorIndex.Quantizer.pq()', // Quantizer configuration
},
};

// Add the class to the schema
await client.schema.classCreator().withClass(setIndexType).do();
}
// END ConfigDynamic

/////////////////////////////
/// AVOID TOP LEVEL AWAIT ///
/////////////////////////////

// Main
async function main(){
const className = "ConfigCollection";

const client = await getClient();

// Run enable dynamic collection code
await deleteClass(client, className)
createDynamicCollection(client, className);

// // Run configure dynamic collection code
// await deleteClass(client, className)
// configureDynamicCollection(client, className);
}

main()
101 changes: 101 additions & 0 deletions _includes/code/howto/indexes/indexes-dynamic-v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# TODO: Configure as part of the test harness

class_name = "ConfigCollection"

########################
### CLIENT CONNECTION ##
########################

import os
import weaviate

client = weaviate.Client(
url="http://localhost:8080",
additional_headers={"X-Cohere-Api-Key": os.getenv("COHERE_API_KEY")},
)

######################
### ENABLE DYNAMIC ###
######################

# Delete data from prior runs
if client.schema.exists(class_name):
client.schema.delete_class(class_name)

# START EnableDynamic
class_obj = {
"class": class_name,
"vectorIndexType": "dynamic",
}

client.schema.create_class(class_obj)
# END EnableDynamic

class_response = client.schema.get()
schema_response = client.schema.get(class_name)

classes = []
for c in class_response["classes"]:
classes.append(c["class"])
assert class_name in classes, "Class missing"

correct_index = False
if (schema_response["class"] == class_name) and (
schema_response["vectorIndexType"] == "dynamic"
):
correct_index = True
assert correct_index, "Wrong index type"

#########################
### CONFIGURE DYNAMIC ###
#########################

# Delete data from prior runs
if client.schema.exists(class_name):
client.schema.delete_class(class_name)

# START ConfigDynamic
class_obj = {
"description": "Dynamic configuration example",
"class": class_name,
"vectorIndexType": "dynamic",
"vectorIndexConfig": {
"hnsw": {
"cleanupIntervalSeconds": 300,
"distanceMetric": "cosine",
"dynamicEfMin": 100,
"dynamicEfMax": 600,
"dynamicEfFactor": 8,
"ef": -1,
"efConstruction": 128,
"flatSearchCutoff": 40000,
"maxConnections": 32,
"skip": False,
"vectorCacheMaxObjects": 1000000000000,
"pq": { "enabled": True, },
},
"flat": {
"distanceMetric": "cosine",
"vector_cache_max_objects": 100000,
"bq": { "enabled": True, },
},
}
}

client.schema.create_class(class_obj)
# END ConfigDynamic

class_response = client.schema.get()
schema_response = client.schema.get(class_name)

classes = []
for c in class_response["classes"]:
classes.append(c["class"])
assert class_name in classes, "Class missing"

correct_index = False
if (schema_response["class"] == class_name) and (
schema_response["vectorIndexType"] == "dynamic"
):
correct_index = True
assert correct_index, "Wrong index type"
94 changes: 94 additions & 0 deletions _includes/code/howto/indexes/indexes-dynamic-v3.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// TODO: Configure as part of the test harness
// TODO: Needs tests

// Imports
import weaviate, { WeaviateClient,vectorizer, configure } from 'weaviate-client';

// Delete pre-existing collections
async function deleteCollection(client: WeaviateClient, collectionName: string){
if(await client.collections.exists(collectionName)){
await client.collections.delete(collectionName)
}
}

// Create client connection
async function getClient(){
const client: WeaviateClient = await weaviate.connectToLocal();
return client;
}

////////////////////
// ENABLE DYNAMIC //
////////////////////

// START EnableDynamic
// Add this import line
// import { vectorizer, dataType, configure } from 'weaviate-client';

async function createDynamicCollection(client: WeaviateClient, collectionName: string){
await client.collections.create({
name: collectionName,
vectorizers: vectorizer.text2VecOpenAI({
vectorIndexConfig: configure.vectorIndex.dynamic(),
}),
})
}
// END EnableDynamic


///////////////////////
// CONFIGURE DYNAMIC //
///////////////////////

// START ConfigDynamic
// Add this import line
// import { vectorizer, dataType, configure } from 'weaviate-client';

async function configureDynamicCollection(client: WeaviateClient, collectionName: string){
await client.collections.create({
name: collectionName,
vectorizers: vectorizer.text2VecOpenAI({
vectorIndexConfig: configure.vectorIndex.dynamic({
distanceMetric: 'cosine',
hnsw: {
distanceMetric: 'cosine',
efConstruction: 256, // Dynamic list size during construction
maxConnections: 128, // Maximum number of connections per node
ef: -1, // Dynamic list size during search; -1 enables dynamic Ef
dynamicEfFactor: 15, // Multiplier for dynamic Ef
dynamicEfMin: 200, // Minimum threshold for dynamic Ef
dynamicEfMax: 1000, // Maximum threshold for dynamic Ef
quantizer: configure.vectorIndex.quantizer.pq() // Compression
},
flat: {
distanceMetric: 'cosine',
vectorCacheMaxObjects: 1000000,
quantizer: configure.vectorIndex.quantizer.bq(),
},
}),
}),
})
}
// END ConfigDynamic

/////////////////////////////
/// AVOID TOP LEVEL AWAIT ///
/////////////////////////////

// Main
async function main(){
const collectionName = "ConfigCollection";

const client = await getClient();

// // Run enable dynamic collection code
// await deleteCollection(client, collectionName)
// await createDynamicCollection(client, collectionName);

// Run configure dynamic collection code
await deleteCollection(client, collectionName)
await configureDynamicCollection(client, collectionName);

}

main()
92 changes: 92 additions & 0 deletions _includes/code/howto/indexes/indexes-dynamic-v4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# TODO: Configure as part of the test harness

collection_name = "ConfigCollection"

########################
### CLIENT CONNECTION ##
########################

import os
import weaviate

cohere_api_key = os.environ["COHERE_API_KEY"]
client = weaviate.connect_to_local(headers={"X-Cohere-Api-Key": cohere_api_key})

######################
### ENABLE DYNAMIC ###
######################

# Delete data from prior runs
if client.collections.exists(collection_name):
client.collections.delete(collection_name)

# START EnableDynamic
from weaviate.classes.config import Configure, Property, DataType, VectorDistances

client.collections.create(
name=collection_name,
description="Configuration example",
vector_index_config=Configure.VectorIndex.dynamic(),
# Configure vectorizer, properties
properties=[
Property(name="title", data_type=DataType.TEXT),
Property(name="body", data_type=DataType.TEXT),
]
)
# END EnableDynamic

collection = client.collections.get(collection_name)
collections_response = client.collections.list_all()
schema_response = collection.config.get()

assert collection_name in collections_response.keys(), "Collection missing"
assert (
str(schema_response.vector_index_type) == "VectorIndexType.DYNAMIC"
), "Wrong index type"

######################
### ENABLE DYNAMIC ###
######################

# Delete data from prior runs
if client.collections.exists(collection_name):
client.collections.delete(collection_name)

# START ConfigDynamic
from weaviate.classes.config import Configure, VectorDistances

client.collections.create(
name=collection_name,
vector_index_config=Configure.VectorIndex.dynamic(
distance_metric=VectorDistances.COSINE,
threshold=20000,
hnsw=Configure.VectorIndex.hnsw(
# Any hnsw configuration parameters
dynamic_ef_factor=15, # Multiplier for dynamic Ef
dynamic_ef_min=200, # Minimum threshold for dynamic Ef
dynamic_ef_max=1000, # Maximum threshold for dynamic Ef
),
flat=Configure.VectorIndex.flat(
# Any flat index configuration parameters
vector_cache_max_objects=100000,
quantizer=Configure.VectorIndex.Quantizer.bq()
),
),
# Configure vectorizer, properties
)
# END ConfigDynamic

collection = client.collections.get(collection_name)
collections_response = client.collections.list_all()
schema_response = collection.config.get()

assert collection_name in collections_response.keys(), "Collection missing"
assert (
str(schema_response.vector_index_type) == "VectorIndexType.DYNAMIC"
), "Wrong index type"

################
### CLEAN UP ###
################

client.close()
Loading