Skip to content

Merge branch 'main' into CAI-823-create-manage-workflow #7

Merge branch 'main' into CAI-823-create-manage-workflow

Merge branch 'main' into CAI-823-create-manage-workflow #7

name: Manage knowledge base

Check failure on line 1 in .github/workflows/manage_knowledge_base.yaml

View workflow run for this annotation

GitHub Actions / .github/workflows/manage_knowledge_base.yaml

Invalid workflow file

(Line: 168, Col: 9): Unexpected value 'with'
run-name: Manage knowledge base in ${{ inputs.environment }}
on:
workflow_dispatch:
inputs:
environment:
description: 'The environment used as target'
type: choice
required: true
default: dev
options:
- dev
- uat
- prod
urls_to_add:
description: 'Websites to add or update (list of urls, comma-separated)'
required: false
type: string
urls_to_remove:
description: 'Websites to remove from the knowledge base (list of urls, comma-separated)'
required: false
type: string
vector_index_mode:
description: 'Vector index mode: update of create (default: update)'
required: true
type: choice
default: update
options:
- update
- create
- none
vector_index_name:
description: 'Vector Index Name'
required: true
type: choice
default: cittadino-index
options:
- cittadino-index
depth:
description: 'Maximum recursion depth for crawling links (optional)'
required: false
type: string
request_timeout_ms:
description: 'Request timeout in milliseconds (optional, default: 20000)'
required: false
type: string
default: '20000'
run_parser:
description: 'Whether to run the parser (optional, default: true)'
required: false
type: boolean
default: true
run_extractor:
description: 'Whether to run the extractor (optional, default: true)'
required: false
type: boolean
default: true
valid_domain_variants:
description: 'JSON array of allowed subdomain tokens (optional, e.g. ["sub1","sub2"])'
required: false
type: string
permissions:
id-token: write
contents: read
jobs:
manage-structured-data:
name: Add knowledge base (manual on ${{ inputs.environment }})
runs-on: codebuild-${{ inputs.environment }}-github-runner-${{ github.run_id }}-${{ github.run_attempt }}
environment: ${{ inputs.environment }}
concurrency:
group: ${{ github.workflow }}-${{ inputs.environment }}
cancel-in-progress: false
steps:
- name: Checkout
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab
- name: Combine URLs into a single string
id: combine_urls
run: |
# Fetch the inputs
ADD_URLS="${{ inputs.urls_to_add }}"
REMOVE_URLS="${{ inputs.urls_to_remove }}"
if [ -n "$ADD_URLS" ] && [ -n "$REMOVE_URLS" ]; then
COMBINED="$ADD_URLS,$REMOVE_URLS"
elif [ -n "$ADD_URLS" ]; then
COMBINED="$ADD_URLS"
else
COMBINED="$REMOVE_URLS"
fi
echo "ALL_URLS=$COMBINED" >> $GITHUB_ENV
- name: Run Cleaner for each URL
uses: ./.github/actions/structured-data-cleaner
with:
deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }}
environment: ${{ inputs.environment }}
remove_from_extractor_folder: ${{ inputs.run_extractor }}
remove_from_parser_folder: ${{ inputs.run_parser }}
urls: ${{ env.ALL_URLS }}
vector_index_name: ${{ inputs.vector_index_name }}
- name: Run Parser for each URL
if: ${{ inputs.run_parser }}
uses: ./.github/actions/parser
with:
deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }}
depth: ${{ inputs.depth }}
environment: ${{ inputs.environment }}
request_timeout_ms: ${{ inputs.request_timeout_ms }}
urls: ${{ inputs.urls_to_add }}
valid_domain_variants: ${{ inputs.valid_domain_variants }}
vector_index_name: ${{ inputs.vector_index_name }}
- name: Run Extractor for each URL
if: ${{ inputs.run_extractor }}
uses: ./.github/actions/extractor
with:
chb_aws_google_api_key: ${{ secrets.CHB_AWS_GOOGLE_API_KEY }}
deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }}
environment: ${{ inputs.environment }}
similarity_threshold: '0.8'
urls: ${{ inputs.urls_to_add }}
vector_index_name: ${{ inputs.vector_index_name }}
manage-index:
name: Manage knowledge base index (manual on ${{ inputs.environment }})
runs-on: codebuild-${{ inputs.environment }}-github-runner-${{ github.run_id }}-${{ github.run_attempt }}
environment: ${{ inputs.environment }}
needs: manage-structured-data
if: ${{ inputs.vector_index_mode != 'none' }}
concurrency:
group: ${{ github.workflow }}-${{ inputs.environment }}
cancel-in-progress: false
steps:
- name: Checkout
uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502
with:
role-to-assume: ${{ secrets.IAM_ROLE_CHATBOT_REINDEX }}
aws-region: eu-south-1
- name: Create index
uses: ./.github/actions/chatbot
if: ${{ inputs.vector_index_mode == 'create' }}
with:
api: 'false'
chatbot_lambda_name: ${{ vars.CHATBOT_LAMBDA_NAME }}
clean_redis: 'false'
dynamic: 'false'
index_id: ${{ inputs.vector_index_name }}
static: 'false'
structured: 'true'
- name: Update index
shell: bash
if: ${{ inputs.vector_index_mode == 'update' }}
with:
chatbot_lambda_name: ${{ vars.CHATBOT_LAMBDA_NAME }}
urls_to_update: ${{ inputs.urls_to_add }}
urls_to_remove: ${{ inputs.urls_to_remove }}
#TODO add the action
run: |
echo "Updating vector index ${{ inputs.vector_index_name }} with new data. This may take a while..."
# Call the update script with the appropriate parameters