PyCodeFlow/gen.py at main · wrobeltomasz/PyCodeFlow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
#!/usr/bin/env python3
"""PyCodeFlow - generate code files from a natural-language instruction using a local Ollama server."""

import logging
import os
import re
import sys
from urllib.parse import urlsplit
import requests
import config

try:
    # GNU readline on Linux/macOS, pyreadline3 on Windows; completion is optional
    import readline
except ImportError:
    readline = None

# Ensure UTF-8 output works regardless of the console codepage
for _stream in (sys.stdout, sys.stderr):
    try:
        _stream.reconfigure(encoding="utf-8")
    except (AttributeError, ValueError):
        pass

logging.basicConfig(
    filename=config.resolve(config.LOG_FILE),
    filemode="a",
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger("pycodeflow")

GREEN = "\033[92m"
RED = "\033[91m"
CYAN = "\033[96m"
RESET = "\033[0m"

# Product metadata shown in the startup banner
VERSION = "1.0"
AUTHOR = "Tomasz Wróbel"

# Bracket tag prefixes offered by TAB completion, in suggestion order
TAG_PREFIXES = ["[file-in:", "[file-out:", "[dir-in:", "[dir-out:", "[model:"]

# Bare interactive commands offered by TAB completion
COMMAND_NAMES = ["help", "exit", "quit"]

# Map language keywords to file extensions
EXT_KEYWORDS = {
    "python": "py", "php": "php", "javascript": "js", "typescript": "ts",
    "html": "html", "css": "css", "java": "java", "ruby": "rb", "rust": "rs",
    "golang": "go", "shell": "sh", "bash": "sh", "json": "json", "yaml": "yml",
    "markdown": "md", "sql": "sql", "c++": "cpp",
}


def make_completer():
    # Build a readline completer that suggests bracket tags and commands by prefix
    options = TAG_PREFIXES + COMMAND_NAMES

    def completer(text, state):
        # With empty text readline lists every option; otherwise filter by prefix
        matches = [opt for opt in options if opt.startswith(text)] if text else list(options)
        if state < len(matches):
            return matches[state]
        return None

    return completer


def setup_completion():
    # Enable TAB completion for bracket tags when a readline backend is available
    if readline is None:
        return
    readline.set_completer(make_completer())
    # Treat only whitespace as a word boundary so a partial [fi token is matched whole
    readline.set_completer_delims(" \t\n")
    # libedit (the macOS default) needs a different bind syntax than GNU readline
    if "libedit" in (getattr(readline, "__doc__", "") or ""):
        readline.parse_and_bind("bind ^I rl_complete")
    else:
        readline.parse_and_bind("tab: complete")


def highlight_tags(text):
    # Highlight any text within square brackets using cyan color
    return re.sub(r"(\[.*?\])", f"{CYAN}\\1{RESET}", text)


def print_banner():
    # Show a small logo with the version and author at startup
    print(f"{CYAN}     (o>{RESET}")
    print(f"{CYAN}     //\\{RESET}")
    print(f"{CYAN}     V_/_{RESET}")
    print(f"   {GREEN}PyCodeFlow v.{VERSION}{RESET} by {AUTHOR}")


def models_url():
    # Derive the Ollama tags endpoint from the configured generate URL
    parts = urlsplit(config.OLLAMA_URL)
    return f"{parts.scheme}://{parts.netloc}/api/tags"


def list_available_models():
    # Return the models installed on the configured Ollama server, or None if unreachable
    try:
        # Use a short timeout so startup stays responsive when the server is down
        response = requests.get(models_url(), timeout=5)
        response.raise_for_status()
        models = response.json().get("models", [])
        return [m.get("name", "") for m in models if m.get("name")]
    except requests.exceptions.RequestException as exc:
        logger.warning("Could not list models from %s: %s", models_url(), exc)
        return None


def print_config(model):
    # Show the active configuration so the runtime settings are visible at a glance
    print(f"\n{GREEN}Active configuration{RESET}")
    print(f"  Ollama server:    {CYAN}{config.OLLAMA_URL}{RESET}")
    print(f"  Ollama model:     {CYAN}{model}{RESET}")
    print(f"  Request timeout:  {CYAN}{config.REQUEST_TIMEOUT}s{RESET}")
    print(f"  Output directory: {CYAN}{config.resolve(config.OUTPUT_DIR)}{RESET}")
    print(f"  Log file:         {CYAN}{config.resolve(config.LOG_FILE)}{RESET}")

    # List the models installed on the configured Ollama server
    models = list_available_models()
    if models is None:
        print(f"  Available models: {RED}could not reach Ollama server{RESET}")
    elif not models:
        print(f"  Available models: {RED}none installed{RESET}")
    else:
        print(f"  Available models: {CYAN}{', '.join(models)}{RESET}")


def print_help():
    # Show the available bracket tags and interactive commands
    print(f"\n{GREEN}PyCodeFlow - available options{RESET}")
    print("\nTags (use anywhere inside your instruction):")
    print(f"  {CYAN}[file-in:NAME]{RESET}  Load an existing file as input context for the model.")
    print(f"  {CYAN}[file-out:NAME]{RESET} Force the output filename, overriding automatic detection.")
    print(f"  {CYAN}[dir-in:NAME]{RESET}   Read [file-in:] context files from the NAME subfolder of the output dir.")
    print(f"  {CYAN}[dir-out:NAME]{RESET}  Write the result into the NAME subfolder of the output dir (created if missing).")
    print(f"  {CYAN}[model:NAME]{RESET}    Use the Ollama model NAME for this request, overriding the default.")
    print("\nCommands:")
    print(f"  {CYAN}help{RESET}         Show this list of options.")
    print(f"  {CYAN}exit{RESET}, {CYAN}quit{RESET}   Leave interactive mode.")
    print("\nTip: type '[' then press TAB to list all tags, or '[fi' then TAB to complete '[file-in:'.")
    print("\nExamples:")
    print('  create a simple HTML page named index.html with a hello world heading')
    print('  [file-in:index.html] generate matching CSS [file-out:style.css]')
    print('  [dir-out:mysite] create a simple HTML page named index.html with a hero section')
    print('  [dir-in:mysite] [file-in:index.html] generate matching CSS [dir-out:mysite] [file-out:style.css]')
    print('  [model:codellama] refactor this parser for clarity')


def detect_filename(clean_text, original_text):
    # Force output filename if [file-out:filename] tag is present
    out_match = re.search(r"\[file-out:(.*?)\]", original_text)
    if out_match:
        return out_match.group(1).strip()

    # Match explicit path/name.ext pattern avoiding version numbers
    match = re.search(r"\b((?:[\w-]+/)*[\w-]+\.[A-Za-z][A-Za-z0-9]{0,5})\b", clean_text)
    if match:
        return match.group(1)

    # Guess extension from language keywords if no path matches
    lowered = clean_text.lower()
    for keyword, ext in EXT_KEYWORDS.items():
        if keyword in lowered:
            return f"output.{ext}"

    return "output.xxx"


def parse_dir_out_tag(instruction):
    # Return the output subfolder named by an optional [dir-out:NAME] tag, or empty string
    match = re.search(r"\[dir-out:(.*?)\]", instruction)
    if match:
        return match.group(1).strip()
    return ""


def parse_dir_in_tag(instruction):
    # Return the input subfolder named by an optional [dir-in:NAME] tag, or empty string
    match = re.search(r"\[dir-in:(.*?)\]", instruction)
    if match:
        return match.group(1).strip()
    return ""


def parse_model_tag(instruction):
    # Return the model named by an optional [model:NAME] tag, or empty string
    match = re.search(r"\[model:(.*?)\]", instruction)
    if match:
        return match.group(1).strip()
    return ""


def safe_output_path(output_dir, subdir, filename):
    # Join subdir and filename under output_dir and ensure the result stays inside it
    base = os.path.realpath(output_dir)
    full_path = os.path.realpath(os.path.join(base, subdir, filename))
    if full_path != base and not full_path.startswith(base + os.sep):
        raise ValueError(f"path escapes the output directory: {os.path.join(subdir, filename)}")
    return full_path


def strip_code_fences(text):
    text = text.strip()

    # Extract full fenced code block ignoring conversational preamble
    fence = re.search(r"```[a-zA-Z0-9+#]*\n(.*?)```", text, re.DOTALL)
    if fence:
        return fence.group(1).strip("\n")

    # Extract code after opening fence if closing fence is missing
    open_fence = re.search(r"```[a-zA-Z0-9+#]*\n", text)
    if open_fence:
        return text[open_fence.end():].strip("\n")

    return text.strip("\n")


def parse_file_tags(instruction, output_dir):
    # Extract file paths from tags and append their contents to the instruction
    pattern = r"\[file-in:(.*?)\]"
    filenames = re.findall(pattern, instruction)

    if not filenames:
        return instruction

    # Read context files from an optional [dir-in:NAME] subfolder of the output dir
    subdir = parse_dir_in_tag(instruction)

    clean_instruction = re.sub(pattern, "", instruction).strip()
    context_parts = [clean_instruction, "\nContext files:"]

    for filename in filenames:
        filepath = os.path.join(output_dir, subdir, filename.strip())
        try:
            with open(filepath, "r", encoding="utf-8") as handle:
                content = handle.read()
            context_parts.append(f"\n--- {filename} ---\n{content}\n--- EOF ---")
        except OSError as exc:
            logger.error("Could not read context file %s: %s", filepath, exc)
            raise FileNotFoundError(f"context file not found: {filename}") from exc

    return "\n".join(context_parts)


def generate_code(instruction, model):
    prompt = (
        "Output ONLY the raw source code for the file described below. "
        "No explanations, no markdown, no extra text. Start with the first line of code.\n\n"
        f"Instruction: {instruction}"
    )

    # Request code generation from the Ollama server
    response = requests.post(
        config.OLLAMA_URL,
        json={"model": model, "prompt": prompt, "stream": False},
        timeout=config.REQUEST_TIMEOUT,
    )
    response.raise_for_status()

    data = response.json()
    text = data.get("response", "")
    if not text.strip():
        raise ValueError("model returned an empty response")

    return text


def process_instruction(instruction, model):
    output_dir = config.resolve(config.OUTPUT_DIR)

    # Override the model for this request if a [model:NAME] tag is present
    tag_model = parse_model_tag(instruction)
    if tag_model:
        model = tag_model
        logger.info("Model override: using %s for this request", model)
        print(f"{CYAN}Using model: {model}{RESET}")

    # Enrich instruction with file contents if tags are present
    try:
        enriched_instruction = parse_file_tags(instruction, output_dir)
    except FileNotFoundError as exc:
        logger.error("Session aborted: %s", exc)
        print(f"{RED}Error: {exc}. Request cannot proceed.{RESET}", file=sys.stderr)
        return False

    # Directive tags control output only; do not feed them to the model
    enriched_instruction = re.sub(r"\[(?:file-out|dir-in|dir-out|model):.*?\]", "", enriched_instruction).strip()

    # Process a single code generation request and handle connection errors
    try:
        raw = generate_code(enriched_instruction, model)
    except requests.exceptions.ConnectionError:
        logger.error("Session failed: could not connect to Ollama at %s", config.OLLAMA_URL)
        print(f"{RED}Error: could not connect to Ollama at {config.OLLAMA_URL}{RESET}", file=sys.stderr)
        return False
    except Exception as exc:  # noqa: BLE001
        logger.error("Session failed: %s", exc)
        print(f"{RED}Error: {exc}{RESET}", file=sys.stderr)
        return False

    code = strip_code_fences(raw)

    # Remove all bracket tags to clean the instruction for regex matching
    clean_instruction = re.sub(r"\[.*?\]", "", instruction)

    # Detect target filename using both clean and original text
    filename = detect_filename(clean_instruction, instruction)

    # Resolve an optional [dir-out:NAME] subfolder under the output directory
    subdir = parse_dir_out_tag(instruction)

    # Build the final path and ensure it stays inside the output directory
    try:
        full_path = safe_output_path(output_dir, subdir, filename)
    except ValueError as exc:
        logger.error("Session failed: %s", exc)
        print(f"{RED}Error: {exc}{RESET}", file=sys.stderr)
        return False

    # Save the generated code, creating any parent subfolders as needed
    try:
        os.makedirs(os.path.dirname(full_path), exist_ok=True)
        with open(full_path, "w", encoding="utf-8") as handle:
            handle.write(code)
    except OSError as exc:
        logger.error("Session failed: could not write %s: %s", filename, exc)
        print(f"{RED}Error: {exc}{RESET}", file=sys.stderr)
        return False

    rel_path = os.path.relpath(full_path, output_dir)
    logger.info("Session success: wrote %s to %s (%d bytes)", rel_path, output_dir, len(code.encode("utf-8")))
    print(f"{GREEN}File {rel_path} has been created in {output_dir}.{RESET}")
    return True


def main():
    model = config.DEFAULT_MODEL
    setup_completion()
    print_banner()
    print(f"\n{GREEN}Interactive mode started. Type 'help' for options or 'exit' to quit.{RESET}")
    print_config(model)

    # Run continuous prompt loop until user exits
    while True:
        try:
            instruction = input("\nEnter instruction: ").strip()
        except (EOFError, KeyboardInterrupt):
            print("\nExiting...")
            break

        # Skip execution if the input is empty
        if not instruction:
            continue

        # Terminate the loop if exit command is provided
        if instruction.lower() in ["exit", "quit"]:
            print("Exiting...")
            break

        # Show the available options when requested
        if instruction.lower() in ["help", "?"]:
            print_help()
            continue

        logger.info("Session start: model=%s instruction=%r", model, instruction)

        # Print the syntax highlighted instruction back to the user
        print(f"Processing: {highlight_tags(instruction)}")

        process_instruction(instruction, model)

    return 0


if __name__ == "__main__":
    sys.exit(main())