From 9cbecaf0e29223d3b830ae4c34d06139ae3e41d9 Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Wed, 15 Apr 2026 11:46:49 -0500 Subject: [PATCH 1/5] refactor: enhance OCR fuzzy parsing and modularize XML converter Add regex-based typo sanitization and extract XML view generation into dedicated functions. --- .../domain/FuzzyAttributeParser.kt | 86 +++++++- .../domain/YoloToXmlConverter.kt | 207 ++++++++++-------- 2 files changed, 190 insertions(+), 103 deletions(-) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index de85c332f7..589fb7f2dd 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -140,12 +140,42 @@ object FuzzyAttributeParser { "transparent" to "@android:color/transparent" ) + private val nonAlphanumericRegex = Regex("[^a-z0-9_]") + private val multipleUnderscoresRegex = Regex("_+") + private val trailingLetterRegex = Regex("_[a-z]$") + private val numberExtractionRegex = Regex("-?\\d+") + + private val ocrLetterOToZeroRegex = Regex("[oO]") + private val ocrLetterIToOneRegex = Regex("[lI]") + private val ocrLetterZToTwoRegex = Regex("[zZ]") + private val ocrLetterSToFiveRegex = Regex("[sS]") + private val ocrLetterBToSixRegex = Regex("[bB]") + + private val commonUiTextTypos = listOf( + Regex("(?i)\\bour name\\b") to "User name", + Regex("(?i)\\bfintsh\\b") to "Finish", + Regex("(?i)\\bpassworo\\b") to "Password", + Regex("(?i)\\busemame\\b") to "Username" + ) + + private val validInputTypes = listOf( + "text", "textPassword", "number", "numberDecimal", + "textEmailAddress", "textUri", "phone" + ) + + private val validGravities = listOf( + "top", "bottom", "left", "right", "center", + "center_vertical", "center_horizontal", "start", "end" + ) + + private val validTextStyles = listOf("normal", "bold", "italic") + private fun normalizeOcrKey(raw: String): String = raw.lowercase() .replace("-", "_") .replace(".", "_") .replace(" ", "_") - .replace(Regex("_+"), "_") + .replace(multipleUnderscoresRegex, "_") .replace(Regex("lay[ao0]ut"), "layout") .replace(Regex("(?<=^|_)[lt]d(?=$|_)"), "id") @@ -159,6 +189,19 @@ object FuzzyAttributeParser { } } + private fun matchCategoricalValue(rawValue: String, allowedValues: List, threshold: Int = 70): String { + val result = FuzzySearch.extractOne(rawValue, allowedValues) + return if (result.score >= threshold) result.string else rawValue + } + + fun sanitizeOpenText(text: String): String { + var cleanedText = text + commonUiTextTypos.forEach { (regex, correction) -> + cleanedText = regex.replace(cleanedText, correction) + } + return cleanedText + } + private fun parseDelimited(annotation: String, tag: String): Map { val result = mutableMapOf() @@ -181,7 +224,7 @@ object FuzzyAttributeParser { val rawValue: String if (colonIndex != -1) { - rawKey = chunk.substring(0, colonIndex).trim() + rawKey = chunk.take(colonIndex).trim() rawValue = chunk.substring(colonIndex + 1).trim() } else { val splitResult = inferKeyValueBoundary(chunk) ?: return null @@ -212,7 +255,7 @@ object FuzzyAttributeParser { val matchedKeys = mutableListOf() for (colonPos in colonPositions) { - val textBefore = annotation.substring(0, colonPos) + val textBefore = annotation.take(colonPos) val words = textBefore.trimEnd().split(Regex("\\s+")) var bestMatch: Pair? = null @@ -246,10 +289,10 @@ object FuzzyAttributeParser { if (bestMatch != null) { val alreadyClaimed = matchedKeys.any { existing -> - bestMatch!!.second >= existing.keyStart && bestMatch!!.second < existing.valueStart + bestMatch.second >= existing.keyStart && bestMatch.second < existing.valueStart } if (!alreadyClaimed) { - matchedKeys.add(MatchedKey(bestMatch!!.first, bestMatch!!.second, colonPos + 1)) + matchedKeys.add(MatchedKey(bestMatch.first, bestMatch.second, colonPos + 1)) } } } @@ -378,6 +421,13 @@ object FuzzyAttributeParser { private fun cleanValue(rawValue: String, key: AttributeKey): String { val trimmed = rawValue.trim() + when (key) { + AttributeKey.INPUT_TYPE -> return matchCategoricalValue(trimmed, validInputTypes) + AttributeKey.GRAVITY, AttributeKey.LAYOUT_GRAVITY -> return matchCategoricalValue(trimmed, validGravities) + AttributeKey.TEXT_STYLE -> return matchCategoricalValue(trimmed, validTextStyles) + else -> {} + } + return when (key.valueType) { ValueType.DIMENSION -> cleanDimension(trimmed) ValueType.SP_DIMENSION -> cleanSpDimension(trimmed) @@ -429,12 +479,19 @@ object FuzzyAttributeParser { } private fun cleanId(value: String): String { - return value.lowercase() - .replace(Regex("[^a-z0-9_]"), "_") - .replace(Regex("_+"), "_") + val cleaned = value.lowercase() + .replace(nonAlphanumericRegex, "_") + .replace(multipleUnderscoresRegex, "_") + .replace("btm", "btn") // OCR typo: btm_finish -> btn_finish .trimEnd('_') .trimStart('_') - .replace(Regex("_[a-z]$"), "") + .replace(trailingLetterRegex, "") + + if (FuzzySearch.ratio(cleaned, "match_parent") > 75 || FuzzySearch.ratio(cleaned, "wrap_content") > 75) { + return "view_${(Math.random() * 1000).toInt()}" + } + + return cleaned } private fun denoiseOcrIdentifier(value: String): String = @@ -458,10 +515,15 @@ object FuzzyAttributeParser { } private fun extractOcrNumber(value: String): String? { - val match = Regex("-?\\d[\\doOlIaA]*").find(value) ?: return null + val normalized = value + .replace(ocrLetterOToZeroRegex, "0") + .replace(ocrLetterIToOneRegex, "1") + .replace(ocrLetterZToTwoRegex, "2") + .replace(ocrLetterSToFiveRegex, "5") + .replace(ocrLetterBToSixRegex, "6") + + val match = numberExtractionRegex.find(normalized) ?: return null return match.value - .replace(Regex("[oOaA]"), "0") - .replace(Regex("[lI]"), "1") } private fun resolveXmlAttribute( diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt index 09f8f11bd9..37f7e49120 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt @@ -13,11 +13,6 @@ object YoloToXmlConverter { private const val TAG = "YoloToXmlConverter" private const val MIN_W_ANY = 8 private const val MIN_H_ANY = 8 - private const val DEFAULT_SPACING_DP = 16 - - private const val HORIZONTAL_ALIGN_THRESHOLD = 20 - private const val VERTICAL_ALIGN_THRESHOLD = 20 - private const val RADIO_GROUP_GAP_THRESHOLD = 24 private const val OVERLAP_THRESHOLD = 0.6 private val TAG_REGEX = Regex("^(B|P|D|T|C|R|SW|S)-\\d+$") @@ -71,53 +66,50 @@ object YoloToXmlConverter { targetDpHeight: Int, wrapInScroll: Boolean = true ): String { + val widgets = detections.filter { it.isYolo && it.label != "widget_tag" } + var scaledBoxes = widgets.map { scaleDetection(it, sourceImageWidth, sourceImageHeight, targetDpWidth, targetDpHeight) } - val widgetTags = detections.filter { - it.label == "widget_tag" || (!it.isYolo && isTag(it.text)) - } - val widgets = detections.filter { it.isYolo }.filter { it.label != "widget_tag" } - - var scaledBoxes = widgets.map { - scaleDetection( - it, - sourceImageWidth, - sourceImageHeight, - targetDpWidth, - targetDpHeight - ) - } - - val parents = scaledBoxes.filter { it.label != "text" && !isTag(it.text) }.toMutableList() + val parents = scaledBoxes.filter { it.label != "text" && !isTag(it.text) } val texts = scaledBoxes.filter { it.label == "text" && !isTag(it.text) } + + scaledBoxes = assignTextToParents(parents, texts, scaledBoxes) + + val uiElements = scaledBoxes.filter { !isTag(it.text) } + val widgetTags = detections.filter { it.label == "widget_tag" || (!it.isYolo && isTag(it.text)) } + val canvasTags = widgetTags.map { scaleDetection(it, sourceImageWidth, sourceImageHeight, targetDpWidth, targetDpHeight) } + + val finalAnnotations = matchAnnotationsToElements(canvasTags, uiElements, annotations) + + val sortedBoxes = uiElements.sortedWith(compareBy({ it.y }, { it.x })) + return buildXml(sortedBoxes, finalAnnotations, targetDpWidth, targetDpHeight, wrapInScroll) + } + + private fun assignTextToParents(parents: List, texts: List, allBoxes: List): List { val consumedTexts = mutableSetOf() for (parent in parents) { texts.firstOrNull { text -> !consumedTexts.contains(text) && - Rect(parent.rect).let { intersection -> - intersection.intersect(text.rect) && - (intersection.width() * intersection.height()).let { intersectionArea -> - val textArea = text.w * text.h - textArea > 0 && (intersectionArea.toFloat() / textArea.toFloat()) > OVERLAP_THRESHOLD - } - } + Rect(parent.rect).let { intersection -> + intersection.intersect(text.rect) && + (intersection.width() * intersection.height()).let { intersectionArea -> + val textArea = text.w * text.h + textArea > 0 && (intersectionArea.toFloat() / textArea.toFloat()) > OVERLAP_THRESHOLD + } + } }?.let { parent.text = it.text consumedTexts.add(it) } } - scaledBoxes = scaledBoxes.filter { !consumedTexts.contains(it) } + return allBoxes.filter { !consumedTexts.contains(it) } + } - val uiElements = scaledBoxes.filter { !isTag(it.text) } - val canvasTags = widgetTags.map { - scaleDetection( - it, - sourceImageWidth, - sourceImageHeight, - targetDpWidth, - targetDpHeight - ) - } + private fun matchAnnotationsToElements( + canvasTags: List, + uiElements: List, + annotations: Map + ): Map { val finalAnnotations = mutableMapOf() val claimedWidgets = mutableSetOf() val appliedAnnotationKeys = mutableSetOf() @@ -156,8 +148,7 @@ object YoloToXmlConverter { } } - val sortedBoxes = uiElements.sortedWith(compareBy({ it.y }, { it.x })) - return buildXml(sortedBoxes, finalAnnotations, targetDpWidth, targetDpHeight, wrapInScroll) + return finalAnnotations } private fun scaleDetection( @@ -255,8 +246,8 @@ object YoloToXmlConverter { val parsedAttrs = parseMarginAnnotations(annotations[box], tag) - val width = parsedAttrs["android:layout_width"] ?: "wrap_content" - val height = parsedAttrs["android:layout_height"] ?: "wrap_content" + val width = parsedAttrs["android:layout_width"] ?: "${box.w}dp" + val height = parsedAttrs["android:layout_height"] ?: "${box.h}dp" val id = parsedAttrs["android:id"]?.substringAfterLast('/') ?: defaultId val writtenAttrs = mutableSetOf( @@ -269,68 +260,102 @@ object YoloToXmlConverter { xml.append("$indent android:layout_height=\"${escapeXmlAttr(height)}\"\n") when (tag) { - "TextView", "Button", "CheckBox", "RadioButton", "Switch" -> { - val viewText = parsedAttrs["android:text"] - ?: box.text.takeIf { it.isNotEmpty() && it != box.label } - ?: box.label - xml.append("$indent android:text=\"${escapeXmlAttr(viewText)}\"\n") - writtenAttrs.add("android:text") - if (tag == "TextView") { - val textSize = parsedAttrs["android:textSize"] ?: "16sp" - xml.append("$indent android:textSize=\"${escapeXmlAttr(textSize)}\"\n") - writtenAttrs.add("android:textSize") - } - if (label.contains("_checked") || label.contains("_on")) { - val checked = parsedAttrs["android:checked"] ?: "true" - xml.append("$indent android:checked=\"${escapeXmlAttr(checked)}\"\n") - writtenAttrs.add("android:checked") - } - xml.append("$indent tools:ignore=\"HardcodedText\"\n") - writtenAttrs.add("tools:ignore") - } + "TextView", "Button", "CheckBox", "RadioButton", "Switch" -> + appendTextViewAttributes(xml, indent, parsedAttrs, box, label, tag, writtenAttrs) - "EditText" -> { - val hint = parsedAttrs["android:hint"] - ?: box.text.ifEmpty { "Enter text..." } - xml.append("$indent android:hint=\"${escapeXmlAttr(hint)}\"\n") - writtenAttrs.add("android:hint") - val inputType = parsedAttrs["android:inputType"] ?: "text" - xml.append("$indent android:inputType=\"${escapeXmlAttr(inputType)}\"\n") - writtenAttrs.add("android:inputType") - xml.append("$indent tools:ignore=\"HardcodedText\"\n") - writtenAttrs.add("tools:ignore") - } + "EditText" -> + appendEditTextAttributes(xml, indent, parsedAttrs, box, writtenAttrs) - "ImageView" -> { - xml.append("$indent android:contentDescription=\"${escapeXmlAttr(label)}\"\n") - writtenAttrs.add("android:contentDescription") - val scaleType = parsedAttrs["android:scaleType"] ?: "centerCrop" - xml.append("$indent android:scaleType=\"${escapeXmlAttr(scaleType)}\"\n") - writtenAttrs.add("android:scaleType") - val bg = parsedAttrs["android:background"] ?: "#E0E0E0" - xml.append("$indent android:background=\"${escapeXmlAttr(bg)}\"\n") - writtenAttrs.add("android:background") - } + "ImageView" -> + appendImageViewAttributes(xml, indent, parsedAttrs, label, writtenAttrs) } parsedAttrs.forEach { (key, value) -> if (key !in writtenAttrs) { - // Add this logic to handle specific attribute value casing - val finalValue = when (key) { - "android:layout_gravity" -> value.lowercase() - else -> value - } - xml.append("$indent $key=\"${escapeXmlAttr(finalValue)}\"\n") + xml.append("$indent $key=\"${escapeXmlAttr(value)}\"\n") writtenAttrs.add(key) } - } xml.append("$indent/>") Log.d(TAG, "appendSimpleView: $xml") } + private fun appendTextViewAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + box: ScaledBox, + label: String, + tag: String, + writtenAttrs: MutableSet + ) { + val rawViewText = parsedAttrs["android:text"] + ?: box.text.takeIf { it.isNotEmpty() && it != box.label } + ?: when (tag) { + "Switch" -> "Switch" + "CheckBox" -> "CheckBox" + "RadioButton" -> "RadioButton" + else -> box.label + } + + val viewText = FuzzyAttributeParser.sanitizeOpenText(rawViewText) + + xml.append("$indent android:text=\"${escapeXmlAttr(viewText)}\"\n") + writtenAttrs.add("android:text") + if (tag == "TextView") { + val textSize = parsedAttrs["android:textSize"] ?: "16sp" + xml.append("$indent android:textSize=\"${escapeXmlAttr(textSize)}\"\n") + writtenAttrs.add("android:textSize") + } + if (label.contains("_checked") || label.contains("_on")) { + val checked = parsedAttrs["android:checked"] ?: "true" + xml.append("$indent android:checked=\"${escapeXmlAttr(checked)}\"\n") + writtenAttrs.add("android:checked") + } + xml.append("$indent tools:ignore=\"HardcodedText\"\n") + writtenAttrs.add("tools:ignore") + } + + private fun appendEditTextAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + box: ScaledBox, + writtenAttrs: MutableSet + ) { + val rawHint = parsedAttrs["android:hint"] ?: box.text.ifEmpty { "Enter text..." } + val hint = FuzzyAttributeParser.sanitizeOpenText(rawHint) + + xml.append("$indent android:hint=\"${escapeXmlAttr(hint)}\"\n") + writtenAttrs.add("android:hint") + + val inputType = parsedAttrs["android:inputType"] ?: "text" + xml.append("$indent android:inputType=\"${escapeXmlAttr(inputType)}\"\n") + writtenAttrs.add("android:inputType") + + xml.append("$indent tools:ignore=\"HardcodedText\"\n") + writtenAttrs.add("tools:ignore") + } + + private fun appendImageViewAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + label: String, + writtenAttrs: MutableSet + ) { + xml.append("$indent android:contentDescription=\"${escapeXmlAttr(label)}\"\n") + writtenAttrs.add("android:contentDescription") + val scaleType = parsedAttrs["android:scaleType"] ?: "centerCrop" + xml.append("$indent android:scaleType=\"${escapeXmlAttr(scaleType)}\"\n") + writtenAttrs.add("android:scaleType") + val bg = parsedAttrs["android:background"] ?: "#E0E0E0" + xml.append("$indent android:background=\"${escapeXmlAttr(bg)}\"\n") + writtenAttrs.add("android:background") + } + private fun parseMarginAnnotations(annotation: String?, tag: String): Map { return FuzzyAttributeParser.parse(annotation, tag) } -} \ No newline at end of file +} From 3ffad0b935be61b1c49ca85ae633939f2d2cc1c4 Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Wed, 15 Apr 2026 14:30:44 -0500 Subject: [PATCH 2/5] refactor: remove redundant sanitizeOpenText function Rely entirely on the existing escapeXmlAttr method for XML safety and drop hardcoded OCR typo fixes. --- .../computervision/domain/FuzzyAttributeParser.kt | 15 --------------- .../computervision/domain/YoloToXmlConverter.kt | 7 ++----- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index 589fb7f2dd..8268c10f2c 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -151,13 +151,6 @@ object FuzzyAttributeParser { private val ocrLetterSToFiveRegex = Regex("[sS]") private val ocrLetterBToSixRegex = Regex("[bB]") - private val commonUiTextTypos = listOf( - Regex("(?i)\\bour name\\b") to "User name", - Regex("(?i)\\bfintsh\\b") to "Finish", - Regex("(?i)\\bpassworo\\b") to "Password", - Regex("(?i)\\busemame\\b") to "Username" - ) - private val validInputTypes = listOf( "text", "textPassword", "number", "numberDecimal", "textEmailAddress", "textUri", "phone" @@ -194,14 +187,6 @@ object FuzzyAttributeParser { return if (result.score >= threshold) result.string else rawValue } - fun sanitizeOpenText(text: String): String { - var cleanedText = text - commonUiTextTypos.forEach { (regex, correction) -> - cleanedText = regex.replace(cleanedText, correction) - } - return cleanedText - } - private fun parseDelimited(annotation: String, tag: String): Map { val result = mutableMapOf() diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt index 37f7e49120..8db364e709 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt @@ -299,9 +299,7 @@ object YoloToXmlConverter { else -> box.label } - val viewText = FuzzyAttributeParser.sanitizeOpenText(rawViewText) - - xml.append("$indent android:text=\"${escapeXmlAttr(viewText)}\"\n") + xml.append("$indent android:text=\"${escapeXmlAttr(rawViewText)}\"\n") writtenAttrs.add("android:text") if (tag == "TextView") { val textSize = parsedAttrs["android:textSize"] ?: "16sp" @@ -325,9 +323,8 @@ object YoloToXmlConverter { writtenAttrs: MutableSet ) { val rawHint = parsedAttrs["android:hint"] ?: box.text.ifEmpty { "Enter text..." } - val hint = FuzzyAttributeParser.sanitizeOpenText(rawHint) - xml.append("$indent android:hint=\"${escapeXmlAttr(hint)}\"\n") + xml.append("$indent android:hint=\"${escapeXmlAttr(rawHint)}\"\n") writtenAttrs.add("android:hint") val inputType = parsedAttrs["android:inputType"] ?: "text" From 0d27755606dc5115786556f543c495e6101ba60f Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Wed, 15 Apr 2026 15:29:46 -0500 Subject: [PATCH 3/5] fix: view IDs generation and OCR corrupts normalization --- .../computervision/domain/FuzzyAttributeParser.kt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index 8268c10f2c..e3bb54367b 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -1,6 +1,7 @@ package org.appdevforall.codeonthego.computervision.domain import com.itsaky.androidide.fuzzysearch.FuzzySearch +import java.util.concurrent.atomic.AtomicInteger object FuzzyAttributeParser { @@ -12,6 +13,7 @@ object FuzzyAttributeParser { else -> 65 } private const val PIPE_DELIMITER = "|" + private val fallbackIdCounter = AtomicInteger(0) enum class AttributeKey( val xmlName: String, @@ -473,7 +475,7 @@ object FuzzyAttributeParser { .replace(trailingLetterRegex, "") if (FuzzySearch.ratio(cleaned, "match_parent") > 75 || FuzzySearch.ratio(cleaned, "wrap_content") > 75) { - return "view_${(Math.random() * 1000).toInt()}" + return "view_${fallbackIdCounter.getAndIncrement()}" } return cleaned @@ -500,15 +502,15 @@ object FuzzyAttributeParser { } private fun extractOcrNumber(value: String): String? { - val normalized = value + val numberCandidateRegex = Regex("-?[\\doOlIzZsSbB]+") + val match = numberCandidateRegex.find(value) ?: return null + + return match.value .replace(ocrLetterOToZeroRegex, "0") .replace(ocrLetterIToOneRegex, "1") .replace(ocrLetterZToTwoRegex, "2") .replace(ocrLetterSToFiveRegex, "5") .replace(ocrLetterBToSixRegex, "6") - - val match = numberExtractionRegex.find(normalized) ?: return null - return match.value } private fun resolveXmlAttribute( From 7db4aba0f9113d46f490b000cdd5cef1750b4761 Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Thu, 16 Apr 2026 16:09:44 -0500 Subject: [PATCH 4/5] refactor: improve metadata detection --- .../domain/FuzzyAttributeParser.kt | 57 ++++-- .../domain/MarginAnnotationParser.kt | 163 ++++++++++------- .../domain/YoloToXmlConverter.kt | 166 +++++++++++++----- 3 files changed, 264 insertions(+), 122 deletions(-) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index e3bb54367b..474312eccc 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -5,12 +5,12 @@ import java.util.concurrent.atomic.AtomicInteger object FuzzyAttributeParser { - private const val FUZZY_VALUE_THRESHOLD = 60 + private const val FUZZY_VALUE_THRESHOLD = 75 private fun fuzzyKeyThreshold(keyLength: Int): Int = when { - keyLength <= 3 -> 50 - keyLength == 4 -> 55 - else -> 65 + keyLength <= 3 -> 65 + keyLength == 6 -> 75 + else -> 80 } private const val PIPE_DELIMITER = "|" private val fallbackIdCounter = AtomicInteger(0) @@ -31,8 +31,8 @@ object FuzzyAttributeParser { CONTENT_DESCRIPTION("android:contentDescription", listOf("contentdescription", "content_description")), TEXT_SIZE("android:textSize", listOf("textsize", "text_size"), ValueType.SP_DIMENSION), - TEXT_COLOR("android:textColor", listOf("textcolor", "text_color"), ValueType.COLOR), - TEXT_STYLE("android:textStyle", listOf("textstyle", "text_style")), + TEXT_COLOR("android:textColor", listOf("textcolor", "text_color", "color", "text_colar", "textcolar"), ValueType.COLOR), + TEXT_STYLE("android:textStyle", listOf("textstyle", "text_style", "style"), ValueType.RAW), TEXT_ALIGNMENT("android:textAlignment", listOf("textalignment", "text_alignment")), TEXT_ALL_CAPS("android:textAllCaps", listOf("textallcaps", "text_all_caps")), FONT_FAMILY("android:fontFamily", listOf("fontfamily", "font_family", "font")), @@ -132,7 +132,7 @@ object FuzzyAttributeParser { } internal val colorMap = mapOf( - "red" to "#FF0000", "green" to "#00FF00", "blue" to "#0000FF", + "red" to "#FF0000", "rel" to "#FF0000", "green" to "#00FF00", "blue" to "#0000FF", "black" to "#000000", "white" to "#FFFFFF", "gray" to "#808080", "grey" to "#808080", "dark_gray" to "#A9A9A9", "yellow" to "#FFFF00", "cyan" to "#00FFFF", "magenta" to "#FF00FF", "purple" to "#800080", @@ -174,13 +174,28 @@ object FuzzyAttributeParser { .replace(Regex("lay[ao0]ut"), "layout") .replace(Regex("(?<=^|_)[lt]d(?=$|_)"), "id") + private fun denoiseOcrText(text: String): String { + return text + .replace(Regex("\\s+:"), ":") + .replace(Regex("(?i)wrap[\\s_]*c[ao]n?t[eo]nt|wrapcan"), "wrap_content") + .replace(Regex("(?i)match[\\s_]*p[ao]r[eo]nt"), "match_parent") + .replace(Regex("(?i)lay[ao]c?t"), "layout") + .replace(Regex("(?i)magin"), "margin") + .replace(Regex("(?i)text\\s*c[ao]l[ao]r"), "textColor") + .replace(Regex("(?i)text\\s*style"), "textStyle") + .replace(Regex("(?i)\\bRel\\b"), "Red") + .replace(Regex("(?i)b[ao]ld"), "bold") + } + fun parse(annotation: String?, tag: String): Map { if (annotation.isNullOrBlank()) return emptyMap() - return if (annotation.contains(PIPE_DELIMITER)) { - parseDelimited(annotation, tag) + val denoised = denoiseOcrText(annotation) + + return if (denoised.contains(PIPE_DELIMITER)) { + parseDelimited(denoised, tag) } else { - parseByColonScanning(annotation, tag) + parseByColonScanning(denoised, tag) } } @@ -430,20 +445,30 @@ object FuzzyAttributeParser { private fun cleanDimension(value: String): String { val normalized = value.lowercase().replace(" ", "_") - val matchParent = FuzzySearch.ratio(normalized, "match_parent") - if (matchParent >= FUZZY_VALUE_THRESHOLD) return "match_parent" + if ("match" in normalized || "parent" in normalized) return "match_parent" + if ("wrap" in normalized || "content" in normalized || "wrapcan" in normalized) return "wrap_content" - val wrapContent = FuzzySearch.ratio(normalized, "wrap_content") - if (wrapContent >= FUZZY_VALUE_THRESHOLD) return "wrap_content" + val fixedUnit = normalized + .replace(Regex("0p$"), "dp") + .replace(Regex("op$"), "dp") + .replace(Regex("olp$"), "dp") + + val numericString = fixedUnit.replace(Regex("[a-z]+$"), "") + val numericPart = extractOcrNumber(numericString) - val numericPart = extractOcrNumber(value.replace(" ", "")) if (numericPart != null) return "${numericPart}dp" return value } private fun cleanSpDimension(value: String): String { - val numericPart = extractOcrNumber(value) + val fixedUnit = value.lowercase() + .replace(" ", "") + .replace(Regex("5p$"), "sp") + + val numericString = fixedUnit.replace(Regex("[a-z]+$"), "") + val numericPart = extractOcrNumber(numericString) + if (numericPart != null) return "${numericPart}sp" return value } diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt index 05ff24c56f..3398164419 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt @@ -11,8 +11,8 @@ object MarginAnnotationParser { private const val GAP_MULTIPLIER = 1.5f private const val HEIGHT_FRACTION = 0.8f - private val TAG_REGEX = Regex("^(B|P|D|T|C|R|SW|S)-\\d+$") - private val TAG_EXTRACT_REGEX = Regex("^([BPDTCRS8]W?)[^a-zA-Z0-9]*([\\dlIoO!]+)(?:\\s+(.+))?$") + private val TAG_REGEX = Regex("^(?i)(B|P|D|T|C|R|SW|S)-\\d+$") + private val TAG_EXTRACT_REGEX = Regex("^(?i)([BPDTCRS8]\\s*W?)[^a-zA-Z0-9]*([\\dlIoO!]+)(?:\\s+(.+))?$") private fun normalizeOcrDigits(raw: String): String = raw.replace('l', '1').replace('I', '1').replace('!', '1') @@ -23,11 +23,15 @@ object MarginAnnotationParser { private fun extractTag(text: String): Pair? { val trimmed = text.trim().trimEnd('.', ',', ';', '_', '|') val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return null - var prefix = match.groupValues[1] + + var prefix = match.groupValues[1].replace(Regex("\\s+"), "").uppercase() if (prefix == "8") prefix = "B" + if (prefix == "8W" || prefix == "S8") prefix = "SW" + val digit = normalizeOcrDigits(match.groupValues[2]) val remaining = match.groupValues[3].takeIf { it.isNotBlank() } val tag = "$prefix-$digit" + if (isTag(tag)) return tag to remaining return null } @@ -46,7 +50,7 @@ object MarginAnnotationParser { val rightMarginDetections = mutableListOf() for (detection in detections) { - val centerX = detection.boundingBox.centerX() + val centerX = centerX(detection) when { centerX > leftMarginPx && centerX < rightMarginPx -> canvasDetections.add(detection) centerX <= leftMarginPx -> leftMarginDetections.add(detection) @@ -57,10 +61,17 @@ object MarginAnnotationParser { val canvasTags = canvasDetections.mapNotNull { det -> extractTag(det.text)?.let { (tag, _) -> tag to det } } + Log.d( + TAG, + "Canvas OCR tags detected: ${ + canvasTags.joinToString(", ") { (tag, det) -> "$tag from '${det.text}'" } + .ifBlank { "none" } + }" + ) val canvasMidX = imageWidth * (leftGuidePct + rightGuidePct) / 2f - val leftCanvasTags = canvasTags.filter { (_, det) -> det.boundingBox.centerX() < canvasMidX } - val rightCanvasTags = canvasTags.filter { (_, det) -> det.boundingBox.centerX() >= canvasMidX } + val leftCanvasTags = canvasTags.filter { (_, det) -> centerX(det) < canvasMidX } + val rightCanvasTags = canvasTags.filter { (_, det) -> centerX(det) >= canvasMidX } val annotationMap = mutableMapOf() annotationMap.putAll(parseMarginGroup(leftMarginDetections, leftCanvasTags)) @@ -73,7 +84,7 @@ object MarginAnnotationParser { val canvasLogOutput = correctedCanvasDetections.joinToString(", ") { val box = it.boundingBox - "'${it.text}', [left:${box.left.roundToInt()}, top:${box.top.roundToInt()}, width:${box.width().roundToInt()}, height:${box.height().roundToInt()}]" + "'${it.text}', [left:${box.left.roundToInt()}, top:${box.top.roundToInt()}, width:${(box.right - box.left).roundToInt()}, height:${(box.bottom - box.top).roundToInt()}]" } Log.d(TAG, "Parsed Canvas Content (Corrected): $canvasLogOutput") @@ -103,7 +114,7 @@ object MarginAnnotationParser { val parsedBlocks = refinedBlocks.mapIndexed { i, block -> val result = parseBlock(block) - val centerY = block.map { it.boundingBox.centerY() }.average().toFloat() + val centerY = block.map { centerY(it) }.average().toFloat() val annotationText = result?.second ?: block.joinToString(" ") { it.text.trim() }.trim() @@ -112,51 +123,82 @@ object MarginAnnotationParser { } val annotationMap = mutableMapOf() - val matchedBlockIndices = mutableSetOf() - - val tagCounts = parsedBlocks - .mapNotNull { it.tag } - .groupingBy { it } - .eachCount() - - for ((i, parsed) in parsedBlocks.withIndex()) { - if (parsed.tag == null || parsed.annotationText.isBlank()) continue - val isUnique = tagCounts[parsed.tag] == 1 - if (isUnique && canvasTags.any { (tag, _) -> tag == parsed.tag }) { - annotationMap[parsed.tag] = parsed.annotationText - matchedBlockIndices.add(i) - Log.d(TAG, "Pass1: tag='${parsed.tag}' matched by unique tag text") - } else if (!isUnique) { - Log.d(TAG, "Pass1: tag='${parsed.tag}' duplicated ${tagCounts[parsed.tag]} times, deferring to Pass2") + + val canvasTagsByPrefix = canvasTags + .groupBy { (tag, _) -> tag.substringBefore('-') } + .mapValues { (_, tags) -> + tags.sortedBy { (_, det) -> centerY(det) } } + + val explicitBlocks = parsedBlocks + .filter { it.tag != null && it.annotationText.isNotBlank() } + + val implicitBlocks = parsedBlocks + .filter { it.tag == null && it.annotationText.length >= 5 } + + for (block in explicitBlocks) { + val tag = block.tag ?: continue + if (canvasTags.isEmpty() || canvasTags.any { (canvasTag, _) -> canvasTag == tag }) { + annotationMap[tag] = block.annotationText + Log.d(TAG, "Pass1: explicit tag match '$tag'") + } + } + + if (canvasTags.isEmpty()) { + Log.d(TAG, "No canvas OCR tags detected; keeping explicit margin annotations only: ${annotationMap.keys}") + return annotationMap } - val remainingBlocks = parsedBlocks.indices - .filter { it !in matchedBlockIndices } - .map { it to parsedBlocks[it] } - .filter { (_, parsed) -> parsed.annotationText.length >= 5 } - .sortedBy { (_, parsed) -> parsed.centerY } - - val usedCanvasTags = mutableSetOf() - for ((idx, parsed) in remainingBlocks) { - val matchingTag = canvasTags - .filter { (tag, _) -> tag !in annotationMap && tag !in usedCanvasTags } - .minByOrNull { (_, det) -> abs(det.boundingBox.centerY() - parsed.centerY) } - - if (matchingTag != null) { - Log.d(TAG, "Pass2: Y-matched block $idx (${parsed.lineCount} lines) -> '${matchingTag.first}'") - annotationMap[matchingTag.first] = parsed.annotationText - usedCanvasTags.add(matchingTag.first) + val unresolvedTagsByPrefix = canvasTagsByPrefix + .mapValues { (prefix, tags) -> + tags.map { it.first } + .filter { tag -> tag !in annotationMap } + .sortedBy { tag -> extractOrdinal(tag) ?: Int.MAX_VALUE } + .toMutableList() } + .toMutableMap() + + val implicitBlocksSorted = implicitBlocks.sortedBy { it.centerY } + + for (block in implicitBlocksSorted) { + val closestPrefix = unresolvedTagsByPrefix + .filterValues { it.isNotEmpty() } + .minByOrNull { (prefix, remainingTags) -> + val nearestTagY = canvasTagsByPrefix[prefix] + ?.firstOrNull { (tag, _) -> tag == remainingTags.firstOrNull() } + ?.second + ?.let { centerY(it) } + ?: Float.MAX_VALUE + + abs(nearestTagY - block.centerY) + } + ?.key + ?: continue + + val assignedTag = unresolvedTagsByPrefix[closestPrefix]?.removeFirstOrNull() ?: continue + annotationMap[assignedTag] = block.annotationText + Log.d(TAG, "Pass2: implicit block assigned to '$assignedTag'") } return annotationMap } + private fun extractOrdinal(tag: String): Int? { + return tag.substringAfter('-', "").toIntOrNull() + } + + private fun centerX(detection: DetectionResult): Float { + return (detection.boundingBox.left + detection.boundingBox.right) / 2f + } + + private fun centerY(detection: DetectionResult): Float { + return (detection.boundingBox.top + detection.boundingBox.bottom) / 2f + } + private fun clusterIntoBlocks(sorted: List): List> { if (sorted.size <= 1) return listOf(sorted) - val avgHeight = sorted.map { it.boundingBox.height() }.average().toFloat() + val avgHeight = sorted.map { it.boundingBox.bottom - it.boundingBox.top }.average().toFloat() val gaps = (0 until sorted.size - 1).map { i -> sorted[i + 1].boundingBox.top - sorted[i].boundingBox.bottom } @@ -205,32 +247,33 @@ object MarginAnnotationParser { private fun parseBlock(block: List): Pair? { var tag: String? = null - var tagFoundAtIndex = -1 val annotationLines = mutableListOf() for ((index, detection) in block.withIndex()) { - val text = detection.text.trim() - if (tag == null && index <= 1) { - val tagExtraction = extractTag(text) - if (tagExtraction != null) { - tag = tagExtraction.first - tagFoundAtIndex = index - tagExtraction.second?.let { annotationLines.add(it) } - continue - } + val text = detection.text + .trim() + .trimStart('|', ':', ';', '.', ',', '_') + + val tagExtraction = extractTag(text) + + if (tag == null && tagExtraction != null && index <= 2) { + tag = tagExtraction.first + tagExtraction.second + ?.trim() + ?.takeIf { it.isNotBlank() } + ?.let(annotationLines::add) + continue } + annotationLines.add(text) } - if (tag != null && tagFoundAtIndex == 1 && annotationLines.isNotEmpty()) { - val firstLine = annotationLines.first() - val tagPrefix = tag.substringBefore('-') - if (firstLine.length <= 2 && firstLine.uppercase().startsWith(tagPrefix)) { - annotationLines.removeAt(0) - } - } + val cleanedAnnotation = annotationLines + .joinToString(" ") + .replace(Regex("\\s+"), " ") + .trim() if (tag == null) return null - return tag to annotationLines.joinToString(" ").trim() + return tag to cleanedAnnotation } -} \ No newline at end of file +} diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt index 8db364e709..40e97665a4 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt @@ -4,9 +4,7 @@ import android.graphics.Rect import android.util.Log import org.appdevforall.codeonthego.computervision.domain.model.DetectionResult import kotlin.math.max -import kotlin.math.pow import kotlin.math.roundToInt -import kotlin.math.sqrt object YoloToXmlConverter { @@ -15,29 +13,33 @@ object YoloToXmlConverter { private const val MIN_H_ANY = 8 private const val OVERLAP_THRESHOLD = 0.6 - private val TAG_REGEX = Regex("^(B|P|D|T|C|R|SW|S)-\\d+$") - private val TAG_EXTRACT_REGEX = Regex("^([BPDTCRS8]W?)[^a-zA-Z0-9]*([\\dlIoO!]+)$") + private val TAG_REGEX = Regex("^(?i)(B|P|D|T|C|R|SW|S)-\\d+$") + private val TAG_EXTRACT_REGEX = Regex("^(?i)([BPDTCRS8]\\s*W?)[^a-zA-Z0-9]*([\\dlIoO!]+)$") private fun normalizeOcrDigits(raw: String): String = raw.replace('l', '1').replace('I', '1').replace('!', '1') .replace('o', '0').replace('O', '0') - private data class ScaledBox( + private class ScaledBox( val label: String, var text: String, val x: Int, val y: Int, val w: Int, val h: Int, val centerX: Int, val centerY: Int, val rect: Rect ) private fun normalizeTagText(text: String): String { val trimmed = text.trim().trimEnd('.', ',', ';', ':', '_', '|') - val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return trimmed - var prefix = match.groupValues[1] + val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return trimmed.uppercase() + + var prefix = match.groupValues[1].replace(Regex("\\s+"), "").uppercase() if (prefix == "8") prefix = "B" + if (prefix == "8W" || prefix == "S8") prefix = "SW" + return "$prefix-${normalizeOcrDigits(match.groupValues[2])}" } private fun isTag(text: String): Boolean = normalizeTagText(text).matches(TAG_REGEX) private fun getTagType(tag: String): String? { + val upperTag = tag.uppercase() return when { tag.startsWith("B-") -> "button" tag.startsWith("P-") -> "image_placeholder" @@ -51,12 +53,6 @@ object YoloToXmlConverter { } } - private fun distance(box1: ScaledBox, box2: ScaledBox): Float { - val dx = (box1.centerX - box2.centerX).toFloat() - val dy = (box1.centerY - box2.centerY).toFloat() - return sqrt(dx.pow(2) + dy.pow(2)) - } - fun generateXmlLayout( detections: List, annotations: Map, @@ -66,7 +62,15 @@ object YoloToXmlConverter { targetDpHeight: Int, wrapInScroll: Boolean = true ): String { - val widgets = detections.filter { it.isYolo && it.label != "widget_tag" } + val widgets = detections + .filter { it.isYolo && it.label != "widget_tag" } + .distinctBy { + if (it.label.startsWith("switch")) { + "${((it.boundingBox.top + it.boundingBox.bottom) / 2f).toInt() / 50}" + } else { + "${it.label}:${it.boundingBox.left}:${it.boundingBox.top}:${it.boundingBox.right}:${it.boundingBox.bottom}" + } + } var scaledBoxes = widgets.map { scaleDetection(it, sourceImageWidth, sourceImageHeight, targetDpWidth, targetDpHeight) } val parents = scaledBoxes.filter { it.label != "text" && !isTag(it.text) } @@ -81,7 +85,7 @@ object YoloToXmlConverter { val finalAnnotations = matchAnnotationsToElements(canvasTags, uiElements, annotations) val sortedBoxes = uiElements.sortedWith(compareBy({ it.y }, { it.x })) - return buildXml(sortedBoxes, finalAnnotations, targetDpWidth, targetDpHeight, wrapInScroll) + return buildXml(sortedBoxes, finalAnnotations, targetDpHeight, wrapInScroll) } private fun assignTextToParents(parents: List, texts: List, allBoxes: List): List { @@ -112,43 +116,112 @@ object YoloToXmlConverter { ): Map { val finalAnnotations = mutableMapOf() val claimedWidgets = mutableSetOf() - val appliedAnnotationKeys = mutableSetOf() val deduplicatedTags = canvasTags .groupBy { normalizeTagText(it.text) } .map { (_, group) -> group.first() } - for (tagBox in deduplicatedTags) { - val normalizedText = normalizeTagText(tagBox.text) - val tagType = getTagType(normalizedText) ?: continue - val annotation = annotations[normalizedText] ?: continue + val tagsByWidgetType = annotations + .mapNotNull { (tagText, annotationText) -> + val normalizedTag = normalizeTagText(tagText) + val widgetType = getTagType(normalizedTag) ?: return@mapNotNull null + + val matchingTagBox = deduplicatedTags.find { normalizeTagText(it.text) == normalizedTag } + + TaggedAnnotation( + normalizedTag = normalizedTag, + widgetType = widgetType, + annotation = annotationText, + tagBox = matchingTagBox + ) + } + .groupBy { it.widgetType } + + val widgetsByType = uiElements.groupBy { normalizeWidgetType(it.label) } + + for ((widgetType, taggedAnnotations) in tagsByWidgetType) { + val candidateWidgets = widgetsByType[widgetType] + ?.sortedWith(compareBy({ it.y }, { it.x })) + ?: continue + + val sortedTags = taggedAnnotations.sortedWith( + compareBy( + { extractTagOrdinal(it.normalizedTag) ?: Int.MAX_VALUE }, + { it.tagBox?.y ?: Int.MAX_VALUE }, + { it.tagBox?.x ?: Int.MAX_VALUE } + ) + ) + + for (taggedAnnotation in sortedTags) { + val ordinal = extractTagOrdinal(taggedAnnotation.normalizedTag) + val matchedWidget = findWidgetByOrdinalOrFallback( + ordinal = ordinal, + tagBox = taggedAnnotation.tagBox, + candidates = candidateWidgets, + claimedWidgets = claimedWidgets + ) ?: continue + + finalAnnotations[matchedWidget] = taggedAnnotation.annotation + claimedWidgets.add(matchedWidget) + } + } + + return finalAnnotations + } + + private data class TaggedAnnotation( + val normalizedTag: String, + val widgetType: String, + val annotation: String, + val tagBox: ScaledBox? + ) - val closestElement = uiElements - .filter { it.label.startsWith(tagType) && it !in claimedWidgets } - .minByOrNull { distance(tagBox, it) } + private fun normalizeWidgetType(label: String): String = when { + label.startsWith("text_entry_box") -> "text_entry_box" + label.startsWith("button") -> "button" + label.startsWith("switch") -> "switch" + label.startsWith("checkbox") -> "checkbox" + label.startsWith("radio") -> "radio" + label.startsWith("dropdown") -> "dropdown" + label.startsWith("slider") -> "slider" + label.startsWith("image_placeholder") -> "image_placeholder" + else -> label + } - if (closestElement != null) { - finalAnnotations[closestElement] = annotation - claimedWidgets.add(closestElement) - appliedAnnotationKeys.add(normalizedText) + private fun extractTagOrdinal(tag: String): Int? { + return tag.substringAfter('-', "").toIntOrNull() + } + + private fun findWidgetByOrdinalOrFallback( + ordinal: Int?, + tagBox: ScaledBox?, + candidates: List, + claimedWidgets: Set + ): ScaledBox? { + val available = candidates.filter { it !in claimedWidgets } + if (available.isEmpty()) return null + + if (ordinal != null) { + val zeroBasedMatch = candidates.getOrNull(ordinal) + if (zeroBasedMatch != null && zeroBasedMatch !in claimedWidgets) { + return zeroBasedMatch + } + + val oneBasedMatch = candidates.getOrNull(ordinal - 1) + if (oneBasedMatch != null && oneBasedMatch !in claimedWidgets) { + return oneBasedMatch } } - for ((tagText, annotation) in annotations) { - val normalizedTagText = normalizeTagText(tagText) - if (normalizedTagText in appliedAnnotationKeys) continue - val tagType = getTagType(normalizedTagText) ?: continue - val unclaimed = uiElements - .filter { it.label.startsWith(tagType) && it !in claimedWidgets } - .sortedWith(compareBy({ it.y }, { it.x })) - .firstOrNull() - if (unclaimed != null) { - finalAnnotations[unclaimed] = annotation - claimedWidgets.add(unclaimed) + if (tagBox != null) { + return available.minByOrNull { candidate -> + val verticalDistance = kotlin.math.abs(tagBox.centerY - candidate.centerY) + val horizontalDistance = kotlin.math.abs(tagBox.centerX - candidate.centerX) + (verticalDistance * 2) + horizontalDistance } } - return finalAnnotations + return available.minByOrNull { it.y } } private fun scaleDetection( @@ -158,10 +231,10 @@ object YoloToXmlConverter { return ScaledBox(detection.label, detection.text, 0, 0, MIN_W_ANY, MIN_H_ANY, MIN_W_ANY / 2, MIN_H_ANY / 2, Rect(0, 0, MIN_W_ANY, MIN_H_ANY)) } val rect = detection.boundingBox - val normCx = rect.centerX() / sourceWidth - val normCy = rect.centerY() / sourceHeight - val normW = rect.width() / sourceWidth - val normH = rect.height() / sourceHeight + val normCx = ((rect.left + rect.right) / 2f) / sourceWidth + val normCy = ((rect.top + rect.bottom) / 2f) / sourceHeight + val normW = (rect.right - rect.left) / sourceWidth + val normH = (rect.bottom - rect.top) / sourceHeight val x = max(0, ((normCx - normW / 2.0) * targetW).roundToInt()) val y = max(0, ((normCy - normH / 2.0) * targetH).roundToInt()) val w = max(MIN_W_ANY, (normW * targetW).roundToInt()) @@ -180,7 +253,9 @@ object YoloToXmlConverter { } private fun escapeXmlAttr(value: String): String = - value.replace("&", "&") + value.replace("|", "") + .trim() + .replace("&", "&") .replace("<", "<") .replace(">", ">") .replace("\"", """) @@ -191,7 +266,7 @@ object YoloToXmlConverter { "button" -> "Button" "image_placeholder", "icon" -> "ImageView" "checkbox_unchecked", "checkbox_checked" -> "CheckBox" - "radio_unchecked", "radio_checked" -> "RadioButton" + "radio_button_unchecked", "radio_button_checked" -> "RadioButton" "switch_off", "switch_on" -> "Switch" "text_entry_box" -> "EditText" "dropdown" -> "Spinner" @@ -203,7 +278,6 @@ object YoloToXmlConverter { private fun buildXml( boxes: List, annotations: Map, - targetDpWidth: Int, targetDpHeight: Int, wrapInScroll: Boolean ): String { From eb6b697e34810349816981cd191e968322b9dc85 Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Fri, 17 Apr 2026 09:20:08 -0500 Subject: [PATCH 5/5] refactor: remove debug logs, remove manual rules for texts and simplify dimension keyword matching --- .../domain/FuzzyAttributeParser.kt | 42 +++++-------------- .../domain/MarginAnnotationParser.kt | 38 ++--------------- 2 files changed, 14 insertions(+), 66 deletions(-) diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index 474312eccc..eedf975b25 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -1,7 +1,6 @@ package org.appdevforall.codeonthego.computervision.domain import com.itsaky.androidide.fuzzysearch.FuzzySearch -import java.util.concurrent.atomic.AtomicInteger object FuzzyAttributeParser { @@ -13,7 +12,6 @@ object FuzzyAttributeParser { else -> 80 } private const val PIPE_DELIMITER = "|" - private val fallbackIdCounter = AtomicInteger(0) enum class AttributeKey( val xmlName: String, @@ -144,8 +142,6 @@ object FuzzyAttributeParser { private val nonAlphanumericRegex = Regex("[^a-z0-9_]") private val multipleUnderscoresRegex = Regex("_+") - private val trailingLetterRegex = Regex("_[a-z]$") - private val numberExtractionRegex = Regex("-?\\d+") private val ocrLetterOToZeroRegex = Regex("[oO]") private val ocrLetterIToOneRegex = Regex("[lI]") @@ -153,6 +149,9 @@ object FuzzyAttributeParser { private val ocrLetterSToFiveRegex = Regex("[sS]") private val ocrLetterBToSixRegex = Regex("[bB]") + private val matchKeywords = setOf("match", "parent") + private val wrapKeywords = setOf("wrap", "content", "wrapcan") + private val validInputTypes = listOf( "text", "textPassword", "number", "numberDecimal", "textEmailAddress", "textUri", "phone" @@ -174,28 +173,15 @@ object FuzzyAttributeParser { .replace(Regex("lay[ao0]ut"), "layout") .replace(Regex("(?<=^|_)[lt]d(?=$|_)"), "id") - private fun denoiseOcrText(text: String): String { - return text - .replace(Regex("\\s+:"), ":") - .replace(Regex("(?i)wrap[\\s_]*c[ao]n?t[eo]nt|wrapcan"), "wrap_content") - .replace(Regex("(?i)match[\\s_]*p[ao]r[eo]nt"), "match_parent") - .replace(Regex("(?i)lay[ao]c?t"), "layout") - .replace(Regex("(?i)magin"), "margin") - .replace(Regex("(?i)text\\s*c[ao]l[ao]r"), "textColor") - .replace(Regex("(?i)text\\s*style"), "textStyle") - .replace(Regex("(?i)\\bRel\\b"), "Red") - .replace(Regex("(?i)b[ao]ld"), "bold") - } - fun parse(annotation: String?, tag: String): Map { if (annotation.isNullOrBlank()) return emptyMap() - val denoised = denoiseOcrText(annotation) + val normalizedSpacing = annotation.replace(Regex("\\s+:"), ":") - return if (denoised.contains(PIPE_DELIMITER)) { - parseDelimited(denoised, tag) + return if (normalizedSpacing.contains(PIPE_DELIMITER)) { + parseDelimited(normalizedSpacing, tag) } else { - parseByColonScanning(denoised, tag) + parseByColonScanning(normalizedSpacing, tag) } } @@ -445,8 +431,8 @@ object FuzzyAttributeParser { private fun cleanDimension(value: String): String { val normalized = value.lowercase().replace(" ", "_") - if ("match" in normalized || "parent" in normalized) return "match_parent" - if ("wrap" in normalized || "content" in normalized || "wrapcan" in normalized) return "wrap_content" + if (matchKeywords.any { it in normalized }) return "match_parent" + if (wrapKeywords.any { it in normalized }) return "wrap_content" val fixedUnit = normalized .replace(Regex("0p$"), "dp") @@ -491,19 +477,11 @@ object FuzzyAttributeParser { } private fun cleanId(value: String): String { - val cleaned = value.lowercase() + return value.lowercase() .replace(nonAlphanumericRegex, "_") .replace(multipleUnderscoresRegex, "_") - .replace("btm", "btn") // OCR typo: btm_finish -> btn_finish .trimEnd('_') .trimStart('_') - .replace(trailingLetterRegex, "") - - if (FuzzySearch.ratio(cleaned, "match_parent") > 75 || FuzzySearch.ratio(cleaned, "wrap_content") > 75) { - return "view_${fallbackIdCounter.getAndIncrement()}" - } - - return cleaned } private fun denoiseOcrIdentifier(value: String): String = diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt index 3398164419..7977ea0678 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt @@ -1,13 +1,9 @@ package org.appdevforall.codeonthego.computervision.domain -import android.util.Log import org.appdevforall.codeonthego.computervision.domain.model.DetectionResult import kotlin.math.abs -import kotlin.math.roundToInt object MarginAnnotationParser { - - private const val TAG = "MarginAnnotationParser" private const val GAP_MULTIPLIER = 1.5f private const val HEIGHT_FRACTION = 0.8f @@ -61,13 +57,6 @@ object MarginAnnotationParser { val canvasTags = canvasDetections.mapNotNull { det -> extractTag(det.text)?.let { (tag, _) -> tag to det } } - Log.d( - TAG, - "Canvas OCR tags detected: ${ - canvasTags.joinToString(", ") { (tag, det) -> "$tag from '${det.text}'" } - .ifBlank { "none" } - }" - ) val canvasMidX = imageWidth * (leftGuidePct + rightGuidePct) / 2f val leftCanvasTags = canvasTags.filter { (_, det) -> centerX(det) < canvasMidX } @@ -77,18 +66,7 @@ object MarginAnnotationParser { annotationMap.putAll(parseMarginGroup(leftMarginDetections, leftCanvasTags)) annotationMap.putAll(parseMarginGroup(rightMarginDetections, rightCanvasTags)) - val correctedCanvasDetections = canvasDetections - - val finalAnnotationLog = annotationMap.entries.joinToString(", ") { "'${it.key}' -> '${it.value}'" } - Log.d(TAG, "Processed Margin Annotations: {$finalAnnotationLog}") - - val canvasLogOutput = correctedCanvasDetections.joinToString(", ") { - val box = it.boundingBox - "'${it.text}', [left:${box.left.roundToInt()}, top:${box.top.roundToInt()}, width:${(box.right - box.left).roundToInt()}, height:${(box.bottom - box.top).roundToInt()}]" - } - Log.d(TAG, "Parsed Canvas Content (Corrected): $canvasLogOutput") - - return Pair(correctedCanvasDetections, annotationMap) + return Pair(canvasDetections, annotationMap) } private data class ParsedBlock( @@ -110,15 +88,12 @@ object MarginAnnotationParser { val gapBlocks = clusterIntoBlocks(sorted) val refinedBlocks = gapBlocks.flatMap { splitAtTags(it, validPrefixes) } - Log.d(TAG, "Spatial clustering: ${detections.size} lines -> ${gapBlocks.size} gap-blocks -> ${refinedBlocks.size} refined-blocks") - - val parsedBlocks = refinedBlocks.mapIndexed { i, block -> + val parsedBlocks = refinedBlocks.mapIndexed { _, block -> val result = parseBlock(block) val centerY = block.map { centerY(it) }.average().toFloat() val annotationText = result?.second ?: block.joinToString(" ") { it.text.trim() }.trim() - Log.d(TAG, "Block $i: tag=${result?.first ?: "none"}, ${block.size} lines, text='${annotationText.take(40)}'") ParsedBlock(result?.first, annotationText, centerY, block.size) } @@ -140,17 +115,13 @@ object MarginAnnotationParser { val tag = block.tag ?: continue if (canvasTags.isEmpty() || canvasTags.any { (canvasTag, _) -> canvasTag == tag }) { annotationMap[tag] = block.annotationText - Log.d(TAG, "Pass1: explicit tag match '$tag'") } } - if (canvasTags.isEmpty()) { - Log.d(TAG, "No canvas OCR tags detected; keeping explicit margin annotations only: ${annotationMap.keys}") - return annotationMap - } + if (canvasTags.isEmpty()) return annotationMap val unresolvedTagsByPrefix = canvasTagsByPrefix - .mapValues { (prefix, tags) -> + .mapValues { (_, tags) -> tags.map { it.first } .filter { tag -> tag !in annotationMap } .sortedBy { tag -> extractOrdinal(tag) ?: Int.MAX_VALUE } @@ -177,7 +148,6 @@ object MarginAnnotationParser { val assignedTag = unresolvedTagsByPrefix[closestPrefix]?.removeFirstOrNull() ?: continue annotationMap[assignedTag] = block.annotationText - Log.d(TAG, "Pass2: implicit block assigned to '$assignedTag'") } return annotationMap