Skip to content

Commit 507032b

Browse files
author
Sam Hokin
committed
Vector to abstract dumper/loader; other weaks.
1 parent efed26e commit 507032b

20 files changed

+824
-188
lines changed

chatbot/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data/

chatbot/build.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ dependencies {
3232
implementation group: 'commons-lang', name: 'commons-lang', version: '2.6'
3333
// https://mvnrepository.com/artifact/commons-cli/commons-cli
3434
implementation group: 'commons-cli', name: 'commons-cli', version: '1.5.0'
35+
// https://mvnrepository.com/artifact/org.apache.commons/commons-text
36+
implementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0'
3537

3638
// https://mvnrepository.com/artifact/org.slf4j/slf4j-simple
3739
implementation group: 'org.slf4j', name: 'slf4j-simple', version: '2.0.7'
@@ -44,6 +46,9 @@ dependencies {
4446
// these are for JSON-B serialization
4547
implementation group: 'javax.json.bind', name: 'javax.json.bind-api', version: '1.0'
4648

49+
// https://mvnrepository.com/artifact/org.json/json
50+
implementation group: 'org.json', name: 'json', version: '20230227'
51+
4752
// https://mvnrepository.com/artifact/org.eclipse/yasson
4853
implementation group: 'org.eclipse', name: 'yasson', version: '1.0.11'
4954

chatbot/scripts/curl-question.sh

Lines changed: 0 additions & 1 deletion
This file was deleted.

chatbot/scripts/datadumper.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
# usage: DataDumper
3+
# -f,--filter filter the query on metadata given by --key and
4+
# --value
5+
# -i,--index <arg> Pinecone index name
6+
# -id,--id <arg> id of a vector to fetch
7+
# -k,--key <arg> key of the metadata for query filter or update
8+
# -t,--term <arg> search term for query
9+
# -topk,--topk <arg> Pinecone Top K value: maximum number of vectors to
10+
# retrieve [5]
11+
# -v,--value <arg> value of the metadata for query filter or update
12+
13+
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.DataDumper "$1" "$2" "$3" "$4" "$5" "$6"

chatbot/scripts/dataloader.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/sh
2+
# usage: DataLoader
3+
# -f,--file <arg> Name of file containing vector data and metadata to be
4+
# loaded into Pinecone.
5+
# -i,--index <arg> Pinecone index name
6+
7+
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.DataLoader $1 $2 $3 $4

chatbot/scripts/jaxrs-question.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
curl "http://localhost:8080/legumebot?question=Which+pathogens+are+most+serious+for+soybean+crops&top_k=5&temperature=0.0&frequency_penalty=0.0&presence_penalty=0.0&show_dois"
2+
echo ""
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
QUERY=$1
2-
3-
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.openai.OpenAi "$QUERY"
1+
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.openai.OpenAi "$1"

chatbot/scripts/pubag-upsert-file.sh renamed to chatbot/scripts/pubag-embeddings-upserter.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,4 @@
66
# -p,--page <arg> page number for search [1]
77
# -t,--term <arg> search term for abstract and title search
88

9-
INDEX_NAME=$1
10-
FILE=$2
11-
12-
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.PubAgEmbeddingsUpserter -a $PUBAG_API_KEY -i $INDEX_NAME -f $FILE
9+
java -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.PubAgEmbeddingsUpserter -a $PUBAG_API_KEY "$1" "$2" "$3" "$4" "$5" "$6" "$7"
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
# usage: PubMedEmbeddingsUpserter
22
# -a,--apikey <arg> PubMed API key
3+
# -d,--doi <arg> DOI for abstract search
34
# -f,--file <arg> file containing Abstract.toString() data
45
# -i,--index <arg> Pinecone index name
56
# -l,--list <arg> comma-separated list of PMIDs
67
# -r,--retmax <arg> value of retmax for abstract search
78
# -t,--term <arg> search term for abstract search
89

9-
INDEX_NAME=$1
10-
FILE=$2
11-
12-
java -Djavax.xml.accessExternalDTD=https -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.PubMedEmbeddingsUpserter -a $PUBMED_API_KEY -i $INDEX_NAME -f $FILE
10+
java -Djavax.xml.accessExternalDTD=https -cp "build/install/ncgr-chatbot/lib/*" org.ncgr.chatbot.PubMedEmbeddingsUpserter -a $PUBMED_API_KEY "$1" "$2" "$3" "$4" "$5" "$6"

chatbot/scripts/pubmed-upsert-ids.sh

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)