Skip to content

Commit

Permalink
Merge pull request #28 from fairDataSociety/issue27
Browse files Browse the repository at this point in the history
feat: #27, run fave without vectorizer
  • Loading branch information
asabya authored Sep 20, 2023
2 parents e01c4a0 + 1459881 commit 1e5d268
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 50 deletions.
36 changes: 24 additions & 12 deletions pkg/document/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/fairDataSociety/FaVe/pkg/vectorizer/rest"
"os"
"strings"
"sync"

h "github.com/fairDataSociety/FaVe/pkg/hnsw"
"github.com/fairDataSociety/FaVe/pkg/hnsw/distancer"
"github.com/fairDataSociety/FaVe/pkg/vectorizer"
"github.com/fairDataSociety/FaVe/pkg/vectorizer/rest"
"github.com/fairdatasociety/fairOS-dfs/pkg/collection"
"github.com/fairdatasociety/fairOS-dfs/pkg/dfs"
"github.com/fairdatasociety/fairOS-dfs/pkg/logging"
Expand Down Expand Up @@ -80,17 +80,14 @@ func New(config Config, api *dfs.API) (*Client, error) {
// client.vectorizer = lkup
//}

if config.VectorizerUrl == "" {
logger.Errorf("VECTORIZER_URL environment variable is not set")
}

// leveldb lookuper
lkup, err := rest.NewVectorizer(config.VectorizerUrl)
if err != nil {
logger.Errorf("new vectorizer failed :%s\n", err.Error())
return nil, err
if config.VectorizerUrl != "" {
lkup, err := rest.NewVectorizer(config.VectorizerUrl)
if err != nil {
logger.Errorf("new vectorizer failed :%s\n", err.Error())
return nil, err
}
client.lookup = lkup
}
client.lookup = lkup
documentCache, err := lru.New(1000)
if err == nil {
client.documentCache = documentCache
Expand Down Expand Up @@ -420,9 +417,18 @@ func (c *Client) AddDocuments(collection string, propertiesToIndex []string, doc
}
c.documentCache.Add(fmt.Sprintf("%s/%s/%d", c.pod, namespacedCollection, indexId), vector)

indexId++
} else if slice, ok := doc.Properties["vector"].([]float32); ok {
err = index.Add(indexId, slice)
if err != nil {
c.logger.Errorf("index.Add failed :%s\n", err.Error())
continue
}
c.documentCache.Add(fmt.Sprintf("%s/%s/%d", c.pod, namespacedCollection, indexId), slice)

indexId++
} else {
return fmt.Errorf("vector is not []float32")
return fmt.Errorf("vector format is now supported")
}
} else {
// vectorize the properties
Expand All @@ -436,6 +442,9 @@ func (c *Client) AddDocuments(collection string, propertiesToIndex []string, doc
}

if vectorData != "" {
if c.lookup == nil {
return fmt.Errorf("vectorizer is not initialized")
}
vector, err := c.lookup.Corpi([]string{vectorData})
if err != nil {
c.logger.Errorf("corpi failed :%s\n", err.Error())
Expand Down Expand Up @@ -544,6 +553,9 @@ func (c *Client) GetNearDocuments(collection, text string, distance float32, lim
return nil, nil, err
}
}
if c.lookup == nil {
return nil, nil, fmt.Errorf("vectorizer is not initialized")
}
vector, err := c.lookup.Corpi([]string{text})
if err != nil {
return nil, nil, err
Expand Down
84 changes: 46 additions & 38 deletions pkg/document/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,30 @@ func TestFave(t *testing.T) {
dfsApi := dfs.NewMockDfsAPI(mockClient, users, logger)
defer dfsApi.Close()

_, err := dfsApi.CreateUserV2(username, password, ""+
"", "")
if err != nil {
t.Fatal(err)
}

cfg := Config{
Verbose: false,
VectorizerUrl: "http://localhost:9876",
}
client, err := New(cfg, dfsApi)
if err != nil {
t.Fatal(err)
}
err = client.Login(username, password)
if err != nil {
t.Fatal(err)
}

err = client.OpenPod("Fave")
if err != nil {
t.Fatal(err)
}

t.Run("test-vectorizer-in-fave", func(t *testing.T) {
_, err := dfsApi.CreateUserV2(username, password, ""+
"", "")
if err != nil {
t.Fatal(err)
}

cfg := Config{
Verbose: false,
VectorizerUrl: "http://localhost:9876",
}
client, err := New(cfg, dfsApi)
if err != nil {
t.Fatal(err)
}
err = client.Login(username, password)
if err != nil {
t.Fatal(err)
}

err = client.OpenPod("Fave")
if err != nil {
t.Fatal(err)
}
file, err := os.Open("./wiki-15.csv")
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -244,6 +242,29 @@ func TestFave(t *testing.T) {
})

t.Run("test-vectorizer-out-of-fave", func(t *testing.T) {
_, err := dfsApi.CreateUserV2(username+"v", password, ""+
"", "")
if err != nil {
t.Fatal(err)
}

cfg := Config{
Verbose: false,
}
client, err := New(cfg, dfsApi)
if err != nil {
t.Fatal(err)
}
err = client.Login(username+"v", password)
if err != nil {
t.Fatal(err)
}

err = client.OpenPod("Fave")
if err != nil {
t.Fatal(err)
}

vctrzr, err := rest.NewVectorizer("http://localhost:9876")
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -416,7 +437,7 @@ func TestFave(t *testing.T) {
t.Fatal(err)
}

err = client2.Login(username, password)
err = client2.Login(username+"v", password)
if err != nil {
t.Fatal(err)
}
Expand All @@ -426,19 +447,6 @@ func TestFave(t *testing.T) {
t.Fatal(err)
}

docs, dist, err = client2.GetNearDocuments(col.Name, "Bat", 1, 1)
if err != nil {
t.Fatal(err)
}
for i, doc := range docs {
props := map[string]interface{}{}
err := json.Unmarshal(doc, &props)
if err != nil {
t.Fatal(err)
}
fmt.Println("Found:", props["title"], dist[i])
}

colls, err := client2.GetCollections()
if err != nil {
t.Fatal(err)
Expand Down

0 comments on commit 1e5d268

Please sign in to comment.