Merge branch 'main' into CAI-823-create-manage-workflow #7
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Manage knowledge base | ||
| run-name: Manage knowledge base in ${{ inputs.environment }} | ||
| on: | ||
| workflow_dispatch: | ||
| inputs: | ||
| environment: | ||
| description: 'The environment used as target' | ||
| type: choice | ||
| required: true | ||
| default: dev | ||
| options: | ||
| - dev | ||
| - uat | ||
| - prod | ||
| urls_to_add: | ||
| description: 'Websites to add or update (list of urls, comma-separated)' | ||
| required: false | ||
| type: string | ||
| urls_to_remove: | ||
| description: 'Websites to remove from the knowledge base (list of urls, comma-separated)' | ||
| required: false | ||
| type: string | ||
| vector_index_mode: | ||
| description: 'Vector index mode: update of create (default: update)' | ||
| required: true | ||
| type: choice | ||
| default: update | ||
| options: | ||
| - update | ||
| - create | ||
| - none | ||
| vector_index_name: | ||
| description: 'Vector Index Name' | ||
| required: true | ||
| type: choice | ||
| default: cittadino-index | ||
| options: | ||
| - cittadino-index | ||
| depth: | ||
| description: 'Maximum recursion depth for crawling links (optional)' | ||
| required: false | ||
| type: string | ||
| request_timeout_ms: | ||
| description: 'Request timeout in milliseconds (optional, default: 20000)' | ||
| required: false | ||
| type: string | ||
| default: '20000' | ||
| run_parser: | ||
| description: 'Whether to run the parser (optional, default: true)' | ||
| required: false | ||
| type: boolean | ||
| default: true | ||
| run_extractor: | ||
| description: 'Whether to run the extractor (optional, default: true)' | ||
| required: false | ||
| type: boolean | ||
| default: true | ||
| valid_domain_variants: | ||
| description: 'JSON array of allowed subdomain tokens (optional, e.g. ["sub1","sub2"])' | ||
| required: false | ||
| type: string | ||
| permissions: | ||
| id-token: write | ||
| contents: read | ||
| jobs: | ||
| manage-structured-data: | ||
| name: Add knowledge base (manual on ${{ inputs.environment }}) | ||
| runs-on: codebuild-${{ inputs.environment }}-github-runner-${{ github.run_id }}-${{ github.run_attempt }} | ||
| environment: ${{ inputs.environment }} | ||
| concurrency: | ||
| group: ${{ github.workflow }}-${{ inputs.environment }} | ||
| cancel-in-progress: false | ||
| steps: | ||
| - name: Checkout | ||
| uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab | ||
| - name: Combine URLs into a single string | ||
| id: combine_urls | ||
| run: | | ||
| # Fetch the inputs | ||
| ADD_URLS="${{ inputs.urls_to_add }}" | ||
| REMOVE_URLS="${{ inputs.urls_to_remove }}" | ||
| if [ -n "$ADD_URLS" ] && [ -n "$REMOVE_URLS" ]; then | ||
| COMBINED="$ADD_URLS,$REMOVE_URLS" | ||
| elif [ -n "$ADD_URLS" ]; then | ||
| COMBINED="$ADD_URLS" | ||
| else | ||
| COMBINED="$REMOVE_URLS" | ||
| fi | ||
| echo "ALL_URLS=$COMBINED" >> $GITHUB_ENV | ||
| - name: Run Cleaner for each URL | ||
| uses: ./.github/actions/structured-data-cleaner | ||
| with: | ||
| deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }} | ||
| environment: ${{ inputs.environment }} | ||
| remove_from_extractor_folder: ${{ inputs.run_extractor }} | ||
| remove_from_parser_folder: ${{ inputs.run_parser }} | ||
| urls: ${{ env.ALL_URLS }} | ||
| vector_index_name: ${{ inputs.vector_index_name }} | ||
| - name: Run Parser for each URL | ||
| if: ${{ inputs.run_parser }} | ||
| uses: ./.github/actions/parser | ||
| with: | ||
| deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }} | ||
| depth: ${{ inputs.depth }} | ||
| environment: ${{ inputs.environment }} | ||
| request_timeout_ms: ${{ inputs.request_timeout_ms }} | ||
| urls: ${{ inputs.urls_to_add }} | ||
| valid_domain_variants: ${{ inputs.valid_domain_variants }} | ||
| vector_index_name: ${{ inputs.vector_index_name }} | ||
| - name: Run Extractor for each URL | ||
| if: ${{ inputs.run_extractor }} | ||
| uses: ./.github/actions/extractor | ||
| with: | ||
| chb_aws_google_api_key: ${{ secrets.CHB_AWS_GOOGLE_API_KEY }} | ||
| deploy_iam_role: ${{ secrets.DEPLOY_IAM_ROLE }} | ||
| environment: ${{ inputs.environment }} | ||
| similarity_threshold: '0.8' | ||
| urls: ${{ inputs.urls_to_add }} | ||
| vector_index_name: ${{ inputs.vector_index_name }} | ||
| manage-index: | ||
| name: Manage knowledge base index (manual on ${{ inputs.environment }}) | ||
| runs-on: codebuild-${{ inputs.environment }}-github-runner-${{ github.run_id }}-${{ github.run_attempt }} | ||
| environment: ${{ inputs.environment }} | ||
| needs: manage-structured-data | ||
| if: ${{ inputs.vector_index_mode != 'none' }} | ||
| concurrency: | ||
| group: ${{ github.workflow }}-${{ inputs.environment }} | ||
| cancel-in-progress: false | ||
| steps: | ||
| - name: Checkout | ||
| uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab | ||
| - name: Configure AWS Credentials | ||
| uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 | ||
| with: | ||
| role-to-assume: ${{ secrets.IAM_ROLE_CHATBOT_REINDEX }} | ||
| aws-region: eu-south-1 | ||
| - name: Create index | ||
| uses: ./.github/actions/chatbot | ||
| if: ${{ inputs.vector_index_mode == 'create' }} | ||
| with: | ||
| api: 'false' | ||
| chatbot_lambda_name: ${{ vars.CHATBOT_LAMBDA_NAME }} | ||
| clean_redis: 'false' | ||
| dynamic: 'false' | ||
| index_id: ${{ inputs.vector_index_name }} | ||
| static: 'false' | ||
| structured: 'true' | ||
| - name: Update index | ||
| shell: bash | ||
| if: ${{ inputs.vector_index_mode == 'update' }} | ||
| with: | ||
| chatbot_lambda_name: ${{ vars.CHATBOT_LAMBDA_NAME }} | ||
| urls_to_update: ${{ inputs.urls_to_add }} | ||
| urls_to_remove: ${{ inputs.urls_to_remove }} | ||
| #TODO add the action | ||
| run: | | ||
| echo "Updating vector index ${{ inputs.vector_index_name }} with new data. This may take a while..." | ||
| # Call the update script with the appropriate parameters | ||