|
| 1 | +# Copyright (C) 2024 Intel Corporation |
| 2 | +# SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +FROM openeuler/python:3.11.13-oe2403lts |
| 5 | + |
| 6 | +ENV LANG=C.UTF-8 |
| 7 | + |
| 8 | +ARG ARCH="cpu" |
| 9 | + |
| 10 | +RUN yum update -y && yum install -y \ |
| 11 | + gcc g++ make cmake \ |
| 12 | + java-1.8.0-openjdk \ |
| 13 | + cairo \ |
| 14 | + mesa-libGL \ |
| 15 | + jemalloc-devel \ |
| 16 | + mariadb-connector-c-devel-3.3.8 \ |
| 17 | + libpq-devel \ |
| 18 | + poppler-utils \ |
| 19 | + tesseract \ |
| 20 | + ffmpeg \ |
| 21 | + wget curl \ |
| 22 | + shadow && \ |
| 23 | + yum clean all && \ |
| 24 | + rm -rf /var/cache/yum |
| 25 | + |
| 26 | +RUN LIBREOFFICE_URL=https://mirrors.bfsu.edu.cn/libreoffice/libreoffice/stable/24.8.6/rpm/x86_64/LibreOffice_24.8.6_Linux_x86-64_rpm.tar.gz && \ |
| 27 | + wget $LIBREOFFICE_URL -O /tmp/LibreOffice_24.8.6_Linux_x86-64_rpm.tar.gz && \ |
| 28 | + tar -xvf /tmp/LibreOffice_24.8.6_Linux_x86-64_rpm.tar.gz -C /tmp && \ |
| 29 | + yum install -y /tmp/LibreOffice_24.8.6.2_Linux_x86-64_rpm/RPMS/*.rpm && \ |
| 30 | + rm -fr /tmp/LibreOffice* |
| 31 | + |
| 32 | +RUN useradd -m -s /bin/bash user && \ |
| 33 | + mkdir -p /home/user && \ |
| 34 | + chown -R user /home/user/ |
| 35 | + |
| 36 | +COPY comps /home/user/comps |
| 37 | + |
| 38 | +ARG uvpip='uv pip install --system --no-cache-dir' |
| 39 | +RUN pip install --no-cache-dir --upgrade pip setuptools uv && \ |
| 40 | + if [ ${ARCH} = "cpu" ]; then \ |
| 41 | + $uvpip torch torchvision --index-url https://download.pytorch.org/whl/cpu; \ |
| 42 | + $uvpip -r /home/user/comps/dataprep/src/requirements-cpu.txt; \ |
| 43 | + else \ |
| 44 | + $uvpip -r /home/user/comps/dataprep/src/requirements-gpu.txt; \ |
| 45 | + fi |
| 46 | + |
| 47 | +ENV PYTHONPATH=$PYTHONPATH:/home/user |
| 48 | + |
| 49 | +RUN mkdir -p /home/user/comps/dataprep/src/uploaded_files && chown -R user /home/user/comps/dataprep/src/uploaded_files |
| 50 | +RUN mkdir -p /data && chown -R user /data |
| 51 | + |
| 52 | +USER user |
| 53 | +ENV NLTK_DATA=/home/user/nltk_data |
| 54 | +# air gapped support: predownload all needed nltk data |
| 55 | +RUN mkdir -p /home/user/nltk_data && python -m nltk.downloader -d /home/user/nltk_data punkt_tab averaged_perceptron_tagger_eng stopwords |
| 56 | +# air gapped support: set model cache dir |
| 57 | +ENV HF_HUB_CACHE=/data |
| 58 | + |
| 59 | +WORKDIR /home/user/comps/dataprep/src |
| 60 | + |
| 61 | +ENTRYPOINT ["sh", "-c", "python $( [ \"$MULTIMODAL_DATAPREP\" = \"true\" ] && echo 'opea_dataprep_multimodal_microservice.py' || echo 'opea_dataprep_microservice.py')"] |
0 commit comments