Skip to content

Commit dc578d8

Browse files
author
莘权 马
committed
feat: Editor + read pdf/docx...
1 parent fa06a67 commit dc578d8

File tree

15 files changed

+447
-30
lines changed

15 files changed

+447
-30
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,4 @@ cov.xml
189189
*-structure.json
190190
*.dot
191191
.python-version
192+
tests/data/requirements/*.jpg

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ FROM nikolaik/python-nodejs:python3.9-nodejs20-slim
33

44
# Install Debian software needed by MetaGPT and clean up in one RUN command to reduce image size
55
RUN apt update &&\
6-
apt install -y libgomp1 git chromium fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 --no-install-recommends &&\
6+
apt install -y libgomp1 git chromium fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 --no-install-recommends file &&\
77
apt clean && rm -rf /var/lib/apt/lists/*
88

99
# Install Mermaid CLI globally

metagpt/actions/di/execute_nb_code.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def parse_outputs(self, outputs: list[str], keep_len: int = 5000) -> Tuple[bool,
191191
output_text = remove_log_and_warning_lines(output_text)
192192
# The useful information of the exception is at the end,
193193
# the useful information of normal output is at the begining.
194-
if '<!DOCTYPE html>' not in output_text:
194+
if "<!DOCTYPE html>" not in output_text:
195195
output_text = output_text[:keep_len] if is_success else output_text[-keep_len:]
196196

197197
parsed_output.append(output_text)
@@ -286,11 +286,7 @@ async def run(self, code: str, language: Literal["python", "markdown"] = "python
286286
def remove_log_and_warning_lines(input_str: str) -> str:
287287
delete_lines = ["[warning]", "warning:", "[cv]", "[info]"]
288288
result = "\n".join(
289-
[
290-
line
291-
for line in input_str.split("\n")
292-
if not any(dl in line.lower() for dl in delete_lines)
293-
]
289+
[line for line in input_str.split("\n") if not any(dl in line.lower() for dl in delete_lines)]
294290
).strip()
295291
return result
296292

metagpt/rag/schema.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""RAG schemas."""
2-
2+
from enum import Enum
33
from pathlib import Path
4-
from typing import Any, ClassVar, Literal, Optional, Union
4+
from typing import Any, ClassVar, List, Literal, Optional, Union
55

66
from chromadb.api.types import CollectionMetadata
77
from llama_index.core.embeddings import BaseEmbedding
@@ -12,6 +12,7 @@
1212

1313
from metagpt.config2 import config
1414
from metagpt.configs.embedding_config import EmbeddingType
15+
from metagpt.logs import logger
1516
from metagpt.rag.interface import RAGObject
1617

1718

@@ -44,7 +45,13 @@ class FAISSRetrieverConfig(IndexRetrieverConfig):
4445
@model_validator(mode="after")
4546
def check_dimensions(self):
4647
if self.dimensions == 0:
47-
self.dimensions = self._embedding_type_to_dimensions.get(config.embedding.api_type, 1536)
48+
self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get(
49+
config.embedding.api_type, 1536
50+
)
51+
if not config.embedding.dimensions and config.embedding.api_type not in self._embedding_type_to_dimensions:
52+
logger.warning(
53+
f"You didn't set dimensions in config when using {config.embedding.api_type}, default to 1536"
54+
)
4855

4956
return self
5057

@@ -207,3 +214,51 @@ def get_obj_metadata(obj: RAGObject) -> dict:
207214
)
208215

209216
return metadata.model_dump()
217+
218+
219+
class OmniParseType(str, Enum):
220+
"""OmniParseType"""
221+
222+
PDF = "PDF"
223+
DOCUMENT = "DOCUMENT"
224+
225+
226+
class ParseResultType(str, Enum):
227+
"""The result type for the parser."""
228+
229+
TXT = "text"
230+
MD = "markdown"
231+
JSON = "json"
232+
233+
234+
class OmniParseOptions(BaseModel):
235+
"""OmniParse Options config"""
236+
237+
result_type: ParseResultType = Field(default=ParseResultType.MD, description="OmniParse result_type")
238+
parse_type: OmniParseType = Field(default=OmniParseType.DOCUMENT, description="OmniParse parse_type")
239+
max_timeout: Optional[int] = Field(default=120, description="Maximum timeout for OmniParse service requests")
240+
num_workers: int = Field(
241+
default=5,
242+
gt=0,
243+
lt=10,
244+
description="Number of concurrent requests for multiple files",
245+
)
246+
247+
248+
class OminParseImage(BaseModel):
249+
image: str = Field(default="", description="image str bytes")
250+
image_name: str = Field(default="", description="image name")
251+
image_info: Optional[dict] = Field(default={}, description="image info")
252+
253+
254+
class OmniParsedResult(BaseModel):
255+
markdown: str = Field(default="", description="markdown text")
256+
text: str = Field(default="", description="plain text")
257+
images: Optional[List[OminParseImage]] = Field(default=[], description="images")
258+
metadata: Optional[dict] = Field(default={}, description="metadata")
259+
260+
@model_validator(mode="before")
261+
def set_markdown(cls, values):
262+
if not values.get("markdown"):
263+
values["markdown"] = values.get("text")
264+
return values

metagpt/strategy/task_type.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
FEATURE_ENGINEERING_PROMPT,
99
IMAGE2WEBPAGE_PROMPT,
1010
MODEL_EVALUATE_PROMPT,
11-
MODEL_TRAIN_PROMPT, WEB_SCRAPING_PROMPT,
11+
MODEL_TRAIN_PROMPT,
12+
WEB_SCRAPING_PROMPT,
1213
)
1314

1415

metagpt/tools/libs/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
deployer,
1818
git,
1919
)
20-
from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description
20+
from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description, get_env_default
2121

2222
_ = (
2323
data_preprocess,
@@ -32,6 +32,7 @@
3232
deployer,
3333
git,
3434
get_env,
35+
get_env_default,
3536
get_env_description,
3637
set_get_env_entry,
3738
default_get_env,

metagpt/tools/libs/editor.py

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
import base64
12
import os
23
import shutil
34
import subprocess
5+
from pathlib import Path
6+
from typing import List, Optional, Union
47

58
from pydantic import BaseModel
69

710
from metagpt.const import DEFAULT_WORKSPACE_ROOT
11+
from metagpt.logs import logger
812
from metagpt.tools.tool_registry import register_tool
13+
from metagpt.utils import read_docx
14+
from metagpt.utils.common import aread_bin, awrite_bin, run_coroutine_sync
15+
from metagpt.utils.repo_to_markdown import is_text_file
916
from metagpt.utils.report import EditorReporter
1017

1118

@@ -40,12 +47,26 @@ def write(self, path: str, content: str):
4047

4148
def read(self, path: str) -> FileBlock:
4249
"""Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
43-
with open(path, "r") as f:
44-
self.resource.report(path, "path")
45-
lines = f.readlines()
50+
is_text, mime_type = run_coroutine_sync(is_text_file, path)
51+
if is_text:
52+
lines = self._read_text(path)
53+
elif mime_type == "application/pdf":
54+
lines = self._read_pdf(path)
55+
elif mime_type in {
56+
"application/msword",
57+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
58+
"application/vnd.ms-word.document.macroEnabled.12",
59+
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
60+
"application/vnd.ms-word.template.macroEnabled.12",
61+
}:
62+
lines = self._read_docx(path)
63+
else:
64+
return FileBlock(file_path=str(path), block_content="")
65+
self.resource.report(str(path), "path")
66+
4667
lines_with_num = [f"{i + 1:03}|{line}" for i, line in enumerate(lines)]
4768
result = FileBlock(
48-
file_path=path,
69+
file_path=str(path),
4970
block_content="".join(lines_with_num),
5071
)
5172
return result
@@ -196,3 +217,63 @@ def _lint_file(cls, file_path: str) -> (bool, str):
196217
lint_passed = result.returncode == 0
197218
lint_message = result.stdout
198219
return lint_passed, lint_message
220+
221+
@staticmethod
222+
def _read_text(path: Union[str, Path]) -> List[str]:
223+
with open(str(path), "r") as f:
224+
lines = f.readlines()
225+
return lines
226+
227+
@staticmethod
228+
def _read_pdf(path: Union[str, Path]) -> List[str]:
229+
result = run_coroutine_sync(Editor._omniparse_read_file, path)
230+
if result:
231+
return result
232+
233+
from llama_index.readers.file import PDFReader
234+
235+
reader = PDFReader()
236+
lines = reader.load_data(file=Path(path))
237+
return [i.text for i in lines]
238+
239+
@staticmethod
240+
def _read_docx(path: Union[str, Path]) -> List[str]:
241+
result = run_coroutine_sync(Editor._omniparse_read_file, path)
242+
if result:
243+
return result
244+
return read_docx(str(path))
245+
246+
@staticmethod
247+
async def _omniparse_read_file(path: Union[str, Path]) -> Optional[List[str]]:
248+
from metagpt.tools.libs import get_env_default
249+
from metagpt.utils.omniparse_client import OmniParseClient
250+
251+
base_url = await get_env_default(key="base_url", app_name="OmniParse", default_value="")
252+
if not base_url:
253+
return None
254+
api_key = await get_env_default(key="api_key", app_name="OmniParse", default_value="")
255+
v = await get_env_default(key="timeout", app_name="OmniParse", default_value="120")
256+
try:
257+
timeout = int(v) or 120
258+
except ValueError:
259+
timeout = 120
260+
261+
try:
262+
client = OmniParseClient(api_key=api_key, base_url=base_url, max_timeout=timeout)
263+
file_data = await aread_bin(filename=path)
264+
ret = await client.parse_document(file_input=file_data, bytes_filename=str(path))
265+
except (ValueError, Exception) as e:
266+
logger.exception(f"{path}: {e}")
267+
return None
268+
if not ret.images:
269+
return [ret.text] if ret.text else None
270+
271+
result = [ret.text]
272+
img_dir = Path(path).parent / (Path(path).name.replace(".", "_") + "_images")
273+
img_dir.mkdir(parents=True, exist_ok=True)
274+
for i in ret.images:
275+
byte_data = base64.b64decode(i.image)
276+
filename = img_dir / i.image_name
277+
await awrite_bin(filename=filename, data=byte_data)
278+
result.append(f"![{i.image_name}]({str(filename)})")
279+
return result

metagpt/tools/libs/env.py

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,34 @@
77
@Desc: Implement `get_env`. RFC 216 2.4.2.4.2.
88
"""
99
import os
10-
from typing import Dict
10+
from typing import Dict, Optional
1111

1212

1313
class EnvKeyNotFoundError(Exception):
1414
def __init__(self, info):
1515
super().__init__(info)
1616

1717

18+
def to_app_key(key: str, app_name: str = None) -> str:
19+
return f"{app_name}-{key}" if app_name else key
20+
21+
22+
def split_app_key(app_key: str) -> (str, str):
23+
if "-" not in app_key:
24+
return "", app_key
25+
app_name, key = app_key.split("-", 1)
26+
return app_name, key
27+
28+
1829
async def default_get_env(key: str, app_name: str = None) -> str:
19-
if key in os.environ:
20-
return os.environ[key]
30+
app_key = to_app_key(key=key, app_name=app_name)
31+
if app_key in os.environ:
32+
return os.environ[app_key]
2133

2234
from metagpt.context import Context
2335

2436
context = Context()
25-
val = context.kwargs.get(key, None)
37+
val = context.kwargs.get(app_key, None)
2638
if val is not None:
2739
return val
2840

@@ -32,14 +44,16 @@ async def default_get_env(key: str, app_name: str = None) -> str:
3244
async def default_get_env_description() -> Dict[str, str]:
3345
result = {}
3446
for k in os.environ.keys():
35-
call = f'await get_env(key="{k}", app_name="")'
47+
app_name, key = split_app_key(k)
48+
call = f'await get_env(key="{key}", app_name="{app_name}")'
3649
result[call] = f"Return the value of environment variable `{k}`."
3750

3851
from metagpt.context import Context
3952

4053
context = Context()
4154
for k in context.kwargs.__dict__.keys():
42-
call = f'await get_env(key="{k}", app_name="")'
55+
app_name, key = split_app_key(k)
56+
call = f'await get_env(key="{key}", app_name="{app_name}")'
4357
result[call] = f"Get the value of environment variable `{k}`."
4458
return result
4559

@@ -84,6 +98,37 @@ async def get_env(key: str, app_name: str = None) -> str:
8498
return await default_get_env(key=key, app_name=app_name)
8599

86100

101+
async def get_env_default(key: str, app_name: str = None, default_value: str = None) -> Optional[str]:
102+
"""
103+
Retrieves the value for the specified environment variable key. If the key is not found,
104+
returns the default value.
105+
106+
Args:
107+
key (str): The name of the environment variable to retrieve.
108+
app_name (str, optional): The name of the application or component to associate with the environment variable.
109+
default_value (str, optional): The default value to return if the environment variable is not found.
110+
111+
Returns:
112+
str or None: The value of the environment variable if found, otherwise the default value.
113+
114+
Example:
115+
>>> from metagpt.tools.libs.env import get_env
116+
>>> api_key = await get_env_default(key="NOT_EXISTS_API_KEY", default_value="<API_KEY>")
117+
>>> print(api_key)
118+
<API_KEY>
119+
120+
>>> from metagpt.tools.libs.env import get_env
121+
>>> api_key = await get_env_default(key="NOT_EXISTS_API_KEY", app_name="GITHUB", default_value="<API_KEY>")
122+
>>> print(api_key)
123+
<API_KEY>
124+
125+
"""
126+
try:
127+
return await get_env(key=key, app_name=app_name)
128+
except EnvKeyNotFoundError:
129+
return default_value
130+
131+
87132
async def get_env_description() -> Dict[str, str]:
88133
global _get_env_description_entry
89134

0 commit comments

Comments
 (0)