diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt index de85c332f7..eedf975b25 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/FuzzyAttributeParser.kt @@ -4,12 +4,12 @@ import com.itsaky.androidide.fuzzysearch.FuzzySearch object FuzzyAttributeParser { - private const val FUZZY_VALUE_THRESHOLD = 60 + private const val FUZZY_VALUE_THRESHOLD = 75 private fun fuzzyKeyThreshold(keyLength: Int): Int = when { - keyLength <= 3 -> 50 - keyLength == 4 -> 55 - else -> 65 + keyLength <= 3 -> 65 + keyLength == 6 -> 75 + else -> 80 } private const val PIPE_DELIMITER = "|" @@ -29,8 +29,8 @@ object FuzzyAttributeParser { CONTENT_DESCRIPTION("android:contentDescription", listOf("contentdescription", "content_description")), TEXT_SIZE("android:textSize", listOf("textsize", "text_size"), ValueType.SP_DIMENSION), - TEXT_COLOR("android:textColor", listOf("textcolor", "text_color"), ValueType.COLOR), - TEXT_STYLE("android:textStyle", listOf("textstyle", "text_style")), + TEXT_COLOR("android:textColor", listOf("textcolor", "text_color", "color", "text_colar", "textcolar"), ValueType.COLOR), + TEXT_STYLE("android:textStyle", listOf("textstyle", "text_style", "style"), ValueType.RAW), TEXT_ALIGNMENT("android:textAlignment", listOf("textalignment", "text_alignment")), TEXT_ALL_CAPS("android:textAllCaps", listOf("textallcaps", "text_all_caps")), FONT_FAMILY("android:fontFamily", listOf("fontfamily", "font_family", "font")), @@ -130,7 +130,7 @@ object FuzzyAttributeParser { } internal val colorMap = mapOf( - "red" to "#FF0000", "green" to "#00FF00", "blue" to "#0000FF", + "red" to "#FF0000", "rel" to "#FF0000", "green" to "#00FF00", "blue" to "#0000FF", "black" to "#000000", "white" to "#FFFFFF", "gray" to "#808080", "grey" to "#808080", "dark_gray" to "#A9A9A9", "yellow" to "#FFFF00", "cyan" to "#00FFFF", "magenta" to "#FF00FF", "purple" to "#800080", @@ -140,25 +140,56 @@ object FuzzyAttributeParser { "transparent" to "@android:color/transparent" ) + private val nonAlphanumericRegex = Regex("[^a-z0-9_]") + private val multipleUnderscoresRegex = Regex("_+") + + private val ocrLetterOToZeroRegex = Regex("[oO]") + private val ocrLetterIToOneRegex = Regex("[lI]") + private val ocrLetterZToTwoRegex = Regex("[zZ]") + private val ocrLetterSToFiveRegex = Regex("[sS]") + private val ocrLetterBToSixRegex = Regex("[bB]") + + private val matchKeywords = setOf("match", "parent") + private val wrapKeywords = setOf("wrap", "content", "wrapcan") + + private val validInputTypes = listOf( + "text", "textPassword", "number", "numberDecimal", + "textEmailAddress", "textUri", "phone" + ) + + private val validGravities = listOf( + "top", "bottom", "left", "right", "center", + "center_vertical", "center_horizontal", "start", "end" + ) + + private val validTextStyles = listOf("normal", "bold", "italic") + private fun normalizeOcrKey(raw: String): String = raw.lowercase() .replace("-", "_") .replace(".", "_") .replace(" ", "_") - .replace(Regex("_+"), "_") + .replace(multipleUnderscoresRegex, "_") .replace(Regex("lay[ao0]ut"), "layout") .replace(Regex("(?<=^|_)[lt]d(?=$|_)"), "id") fun parse(annotation: String?, tag: String): Map { if (annotation.isNullOrBlank()) return emptyMap() - return if (annotation.contains(PIPE_DELIMITER)) { - parseDelimited(annotation, tag) + val normalizedSpacing = annotation.replace(Regex("\\s+:"), ":") + + return if (normalizedSpacing.contains(PIPE_DELIMITER)) { + parseDelimited(normalizedSpacing, tag) } else { - parseByColonScanning(annotation, tag) + parseByColonScanning(normalizedSpacing, tag) } } + private fun matchCategoricalValue(rawValue: String, allowedValues: List, threshold: Int = 70): String { + val result = FuzzySearch.extractOne(rawValue, allowedValues) + return if (result.score >= threshold) result.string else rawValue + } + private fun parseDelimited(annotation: String, tag: String): Map { val result = mutableMapOf() @@ -181,7 +212,7 @@ object FuzzyAttributeParser { val rawValue: String if (colonIndex != -1) { - rawKey = chunk.substring(0, colonIndex).trim() + rawKey = chunk.take(colonIndex).trim() rawValue = chunk.substring(colonIndex + 1).trim() } else { val splitResult = inferKeyValueBoundary(chunk) ?: return null @@ -212,7 +243,7 @@ object FuzzyAttributeParser { val matchedKeys = mutableListOf() for (colonPos in colonPositions) { - val textBefore = annotation.substring(0, colonPos) + val textBefore = annotation.take(colonPos) val words = textBefore.trimEnd().split(Regex("\\s+")) var bestMatch: Pair? = null @@ -246,10 +277,10 @@ object FuzzyAttributeParser { if (bestMatch != null) { val alreadyClaimed = matchedKeys.any { existing -> - bestMatch!!.second >= existing.keyStart && bestMatch!!.second < existing.valueStart + bestMatch.second >= existing.keyStart && bestMatch.second < existing.valueStart } if (!alreadyClaimed) { - matchedKeys.add(MatchedKey(bestMatch!!.first, bestMatch!!.second, colonPos + 1)) + matchedKeys.add(MatchedKey(bestMatch.first, bestMatch.second, colonPos + 1)) } } } @@ -378,6 +409,13 @@ object FuzzyAttributeParser { private fun cleanValue(rawValue: String, key: AttributeKey): String { val trimmed = rawValue.trim() + when (key) { + AttributeKey.INPUT_TYPE -> return matchCategoricalValue(trimmed, validInputTypes) + AttributeKey.GRAVITY, AttributeKey.LAYOUT_GRAVITY -> return matchCategoricalValue(trimmed, validGravities) + AttributeKey.TEXT_STYLE -> return matchCategoricalValue(trimmed, validTextStyles) + else -> {} + } + return when (key.valueType) { ValueType.DIMENSION -> cleanDimension(trimmed) ValueType.SP_DIMENSION -> cleanSpDimension(trimmed) @@ -393,20 +431,30 @@ object FuzzyAttributeParser { private fun cleanDimension(value: String): String { val normalized = value.lowercase().replace(" ", "_") - val matchParent = FuzzySearch.ratio(normalized, "match_parent") - if (matchParent >= FUZZY_VALUE_THRESHOLD) return "match_parent" + if (matchKeywords.any { it in normalized }) return "match_parent" + if (wrapKeywords.any { it in normalized }) return "wrap_content" - val wrapContent = FuzzySearch.ratio(normalized, "wrap_content") - if (wrapContent >= FUZZY_VALUE_THRESHOLD) return "wrap_content" + val fixedUnit = normalized + .replace(Regex("0p$"), "dp") + .replace(Regex("op$"), "dp") + .replace(Regex("olp$"), "dp") + + val numericString = fixedUnit.replace(Regex("[a-z]+$"), "") + val numericPart = extractOcrNumber(numericString) - val numericPart = extractOcrNumber(value.replace(" ", "")) if (numericPart != null) return "${numericPart}dp" return value } private fun cleanSpDimension(value: String): String { - val numericPart = extractOcrNumber(value) + val fixedUnit = value.lowercase() + .replace(" ", "") + .replace(Regex("5p$"), "sp") + + val numericString = fixedUnit.replace(Regex("[a-z]+$"), "") + val numericPart = extractOcrNumber(numericString) + if (numericPart != null) return "${numericPart}sp" return value } @@ -430,11 +478,10 @@ object FuzzyAttributeParser { private fun cleanId(value: String): String { return value.lowercase() - .replace(Regex("[^a-z0-9_]"), "_") - .replace(Regex("_+"), "_") + .replace(nonAlphanumericRegex, "_") + .replace(multipleUnderscoresRegex, "_") .trimEnd('_') .trimStart('_') - .replace(Regex("_[a-z]$"), "") } private fun denoiseOcrIdentifier(value: String): String = @@ -458,10 +505,15 @@ object FuzzyAttributeParser { } private fun extractOcrNumber(value: String): String? { - val match = Regex("-?\\d[\\doOlIaA]*").find(value) ?: return null + val numberCandidateRegex = Regex("-?[\\doOlIzZsSbB]+") + val match = numberCandidateRegex.find(value) ?: return null + return match.value - .replace(Regex("[oOaA]"), "0") - .replace(Regex("[lI]"), "1") + .replace(ocrLetterOToZeroRegex, "0") + .replace(ocrLetterIToOneRegex, "1") + .replace(ocrLetterZToTwoRegex, "2") + .replace(ocrLetterSToFiveRegex, "5") + .replace(ocrLetterBToSixRegex, "6") } private fun resolveXmlAttribute( diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt index 05ff24c56f..7977ea0678 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/MarginAnnotationParser.kt @@ -1,18 +1,14 @@ package org.appdevforall.codeonthego.computervision.domain -import android.util.Log import org.appdevforall.codeonthego.computervision.domain.model.DetectionResult import kotlin.math.abs -import kotlin.math.roundToInt object MarginAnnotationParser { - - private const val TAG = "MarginAnnotationParser" private const val GAP_MULTIPLIER = 1.5f private const val HEIGHT_FRACTION = 0.8f - private val TAG_REGEX = Regex("^(B|P|D|T|C|R|SW|S)-\\d+$") - private val TAG_EXTRACT_REGEX = Regex("^([BPDTCRS8]W?)[^a-zA-Z0-9]*([\\dlIoO!]+)(?:\\s+(.+))?$") + private val TAG_REGEX = Regex("^(?i)(B|P|D|T|C|R|SW|S)-\\d+$") + private val TAG_EXTRACT_REGEX = Regex("^(?i)([BPDTCRS8]\\s*W?)[^a-zA-Z0-9]*([\\dlIoO!]+)(?:\\s+(.+))?$") private fun normalizeOcrDigits(raw: String): String = raw.replace('l', '1').replace('I', '1').replace('!', '1') @@ -23,11 +19,15 @@ object MarginAnnotationParser { private fun extractTag(text: String): Pair? { val trimmed = text.trim().trimEnd('.', ',', ';', '_', '|') val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return null - var prefix = match.groupValues[1] + + var prefix = match.groupValues[1].replace(Regex("\\s+"), "").uppercase() if (prefix == "8") prefix = "B" + if (prefix == "8W" || prefix == "S8") prefix = "SW" + val digit = normalizeOcrDigits(match.groupValues[2]) val remaining = match.groupValues[3].takeIf { it.isNotBlank() } val tag = "$prefix-$digit" + if (isTag(tag)) return tag to remaining return null } @@ -46,7 +46,7 @@ object MarginAnnotationParser { val rightMarginDetections = mutableListOf() for (detection in detections) { - val centerX = detection.boundingBox.centerX() + val centerX = centerX(detection) when { centerX > leftMarginPx && centerX < rightMarginPx -> canvasDetections.add(detection) centerX <= leftMarginPx -> leftMarginDetections.add(detection) @@ -59,25 +59,14 @@ object MarginAnnotationParser { } val canvasMidX = imageWidth * (leftGuidePct + rightGuidePct) / 2f - val leftCanvasTags = canvasTags.filter { (_, det) -> det.boundingBox.centerX() < canvasMidX } - val rightCanvasTags = canvasTags.filter { (_, det) -> det.boundingBox.centerX() >= canvasMidX } + val leftCanvasTags = canvasTags.filter { (_, det) -> centerX(det) < canvasMidX } + val rightCanvasTags = canvasTags.filter { (_, det) -> centerX(det) >= canvasMidX } val annotationMap = mutableMapOf() annotationMap.putAll(parseMarginGroup(leftMarginDetections, leftCanvasTags)) annotationMap.putAll(parseMarginGroup(rightMarginDetections, rightCanvasTags)) - val correctedCanvasDetections = canvasDetections - - val finalAnnotationLog = annotationMap.entries.joinToString(", ") { "'${it.key}' -> '${it.value}'" } - Log.d(TAG, "Processed Margin Annotations: {$finalAnnotationLog}") - - val canvasLogOutput = correctedCanvasDetections.joinToString(", ") { - val box = it.boundingBox - "'${it.text}', [left:${box.left.roundToInt()}, top:${box.top.roundToInt()}, width:${box.width().roundToInt()}, height:${box.height().roundToInt()}]" - } - Log.d(TAG, "Parsed Canvas Content (Corrected): $canvasLogOutput") - - return Pair(correctedCanvasDetections, annotationMap) + return Pair(canvasDetections, annotationMap) } private data class ParsedBlock( @@ -99,64 +88,87 @@ object MarginAnnotationParser { val gapBlocks = clusterIntoBlocks(sorted) val refinedBlocks = gapBlocks.flatMap { splitAtTags(it, validPrefixes) } - Log.d(TAG, "Spatial clustering: ${detections.size} lines -> ${gapBlocks.size} gap-blocks -> ${refinedBlocks.size} refined-blocks") - - val parsedBlocks = refinedBlocks.mapIndexed { i, block -> + val parsedBlocks = refinedBlocks.mapIndexed { _, block -> val result = parseBlock(block) - val centerY = block.map { it.boundingBox.centerY() }.average().toFloat() + val centerY = block.map { centerY(it) }.average().toFloat() val annotationText = result?.second ?: block.joinToString(" ") { it.text.trim() }.trim() - Log.d(TAG, "Block $i: tag=${result?.first ?: "none"}, ${block.size} lines, text='${annotationText.take(40)}'") ParsedBlock(result?.first, annotationText, centerY, block.size) } val annotationMap = mutableMapOf() - val matchedBlockIndices = mutableSetOf() - - val tagCounts = parsedBlocks - .mapNotNull { it.tag } - .groupingBy { it } - .eachCount() - - for ((i, parsed) in parsedBlocks.withIndex()) { - if (parsed.tag == null || parsed.annotationText.isBlank()) continue - val isUnique = tagCounts[parsed.tag] == 1 - if (isUnique && canvasTags.any { (tag, _) -> tag == parsed.tag }) { - annotationMap[parsed.tag] = parsed.annotationText - matchedBlockIndices.add(i) - Log.d(TAG, "Pass1: tag='${parsed.tag}' matched by unique tag text") - } else if (!isUnique) { - Log.d(TAG, "Pass1: tag='${parsed.tag}' duplicated ${tagCounts[parsed.tag]} times, deferring to Pass2") + + val canvasTagsByPrefix = canvasTags + .groupBy { (tag, _) -> tag.substringBefore('-') } + .mapValues { (_, tags) -> + tags.sortedBy { (_, det) -> centerY(det) } + } + + val explicitBlocks = parsedBlocks + .filter { it.tag != null && it.annotationText.isNotBlank() } + + val implicitBlocks = parsedBlocks + .filter { it.tag == null && it.annotationText.length >= 5 } + + for (block in explicitBlocks) { + val tag = block.tag ?: continue + if (canvasTags.isEmpty() || canvasTags.any { (canvasTag, _) -> canvasTag == tag }) { + annotationMap[tag] = block.annotationText } } - val remainingBlocks = parsedBlocks.indices - .filter { it !in matchedBlockIndices } - .map { it to parsedBlocks[it] } - .filter { (_, parsed) -> parsed.annotationText.length >= 5 } - .sortedBy { (_, parsed) -> parsed.centerY } - - val usedCanvasTags = mutableSetOf() - for ((idx, parsed) in remainingBlocks) { - val matchingTag = canvasTags - .filter { (tag, _) -> tag !in annotationMap && tag !in usedCanvasTags } - .minByOrNull { (_, det) -> abs(det.boundingBox.centerY() - parsed.centerY) } - - if (matchingTag != null) { - Log.d(TAG, "Pass2: Y-matched block $idx (${parsed.lineCount} lines) -> '${matchingTag.first}'") - annotationMap[matchingTag.first] = parsed.annotationText - usedCanvasTags.add(matchingTag.first) + if (canvasTags.isEmpty()) return annotationMap + + val unresolvedTagsByPrefix = canvasTagsByPrefix + .mapValues { (_, tags) -> + tags.map { it.first } + .filter { tag -> tag !in annotationMap } + .sortedBy { tag -> extractOrdinal(tag) ?: Int.MAX_VALUE } + .toMutableList() } + .toMutableMap() + + val implicitBlocksSorted = implicitBlocks.sortedBy { it.centerY } + + for (block in implicitBlocksSorted) { + val closestPrefix = unresolvedTagsByPrefix + .filterValues { it.isNotEmpty() } + .minByOrNull { (prefix, remainingTags) -> + val nearestTagY = canvasTagsByPrefix[prefix] + ?.firstOrNull { (tag, _) -> tag == remainingTags.firstOrNull() } + ?.second + ?.let { centerY(it) } + ?: Float.MAX_VALUE + + abs(nearestTagY - block.centerY) + } + ?.key + ?: continue + + val assignedTag = unresolvedTagsByPrefix[closestPrefix]?.removeFirstOrNull() ?: continue + annotationMap[assignedTag] = block.annotationText } return annotationMap } + private fun extractOrdinal(tag: String): Int? { + return tag.substringAfter('-', "").toIntOrNull() + } + + private fun centerX(detection: DetectionResult): Float { + return (detection.boundingBox.left + detection.boundingBox.right) / 2f + } + + private fun centerY(detection: DetectionResult): Float { + return (detection.boundingBox.top + detection.boundingBox.bottom) / 2f + } + private fun clusterIntoBlocks(sorted: List): List> { if (sorted.size <= 1) return listOf(sorted) - val avgHeight = sorted.map { it.boundingBox.height() }.average().toFloat() + val avgHeight = sorted.map { it.boundingBox.bottom - it.boundingBox.top }.average().toFloat() val gaps = (0 until sorted.size - 1).map { i -> sorted[i + 1].boundingBox.top - sorted[i].boundingBox.bottom } @@ -205,32 +217,33 @@ object MarginAnnotationParser { private fun parseBlock(block: List): Pair? { var tag: String? = null - var tagFoundAtIndex = -1 val annotationLines = mutableListOf() for ((index, detection) in block.withIndex()) { - val text = detection.text.trim() - if (tag == null && index <= 1) { - val tagExtraction = extractTag(text) - if (tagExtraction != null) { - tag = tagExtraction.first - tagFoundAtIndex = index - tagExtraction.second?.let { annotationLines.add(it) } - continue - } + val text = detection.text + .trim() + .trimStart('|', ':', ';', '.', ',', '_') + + val tagExtraction = extractTag(text) + + if (tag == null && tagExtraction != null && index <= 2) { + tag = tagExtraction.first + tagExtraction.second + ?.trim() + ?.takeIf { it.isNotBlank() } + ?.let(annotationLines::add) + continue } + annotationLines.add(text) } - if (tag != null && tagFoundAtIndex == 1 && annotationLines.isNotEmpty()) { - val firstLine = annotationLines.first() - val tagPrefix = tag.substringBefore('-') - if (firstLine.length <= 2 && firstLine.uppercase().startsWith(tagPrefix)) { - annotationLines.removeAt(0) - } - } + val cleanedAnnotation = annotationLines + .joinToString(" ") + .replace(Regex("\\s+"), " ") + .trim() if (tag == null) return null - return tag to annotationLines.joinToString(" ").trim() + return tag to cleanedAnnotation } -} \ No newline at end of file +} diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt index 09f8f11bd9..40e97665a4 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/YoloToXmlConverter.kt @@ -4,45 +4,42 @@ import android.graphics.Rect import android.util.Log import org.appdevforall.codeonthego.computervision.domain.model.DetectionResult import kotlin.math.max -import kotlin.math.pow import kotlin.math.roundToInt -import kotlin.math.sqrt object YoloToXmlConverter { private const val TAG = "YoloToXmlConverter" private const val MIN_W_ANY = 8 private const val MIN_H_ANY = 8 - private const val DEFAULT_SPACING_DP = 16 - - private const val HORIZONTAL_ALIGN_THRESHOLD = 20 - private const val VERTICAL_ALIGN_THRESHOLD = 20 - private const val RADIO_GROUP_GAP_THRESHOLD = 24 private const val OVERLAP_THRESHOLD = 0.6 - private val TAG_REGEX = Regex("^(B|P|D|T|C|R|SW|S)-\\d+$") - private val TAG_EXTRACT_REGEX = Regex("^([BPDTCRS8]W?)[^a-zA-Z0-9]*([\\dlIoO!]+)$") + private val TAG_REGEX = Regex("^(?i)(B|P|D|T|C|R|SW|S)-\\d+$") + private val TAG_EXTRACT_REGEX = Regex("^(?i)([BPDTCRS8]\\s*W?)[^a-zA-Z0-9]*([\\dlIoO!]+)$") private fun normalizeOcrDigits(raw: String): String = raw.replace('l', '1').replace('I', '1').replace('!', '1') .replace('o', '0').replace('O', '0') - private data class ScaledBox( + private class ScaledBox( val label: String, var text: String, val x: Int, val y: Int, val w: Int, val h: Int, val centerX: Int, val centerY: Int, val rect: Rect ) private fun normalizeTagText(text: String): String { val trimmed = text.trim().trimEnd('.', ',', ';', ':', '_', '|') - val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return trimmed - var prefix = match.groupValues[1] + val match = TAG_EXTRACT_REGEX.find(trimmed) ?: return trimmed.uppercase() + + var prefix = match.groupValues[1].replace(Regex("\\s+"), "").uppercase() if (prefix == "8") prefix = "B" + if (prefix == "8W" || prefix == "S8") prefix = "SW" + return "$prefix-${normalizeOcrDigits(match.groupValues[2])}" } private fun isTag(text: String): Boolean = normalizeTagText(text).matches(TAG_REGEX) private fun getTagType(tag: String): String? { + val upperTag = tag.uppercase() return when { tag.startsWith("B-") -> "button" tag.startsWith("P-") -> "image_placeholder" @@ -56,12 +53,6 @@ object YoloToXmlConverter { } } - private fun distance(box1: ScaledBox, box2: ScaledBox): Float { - val dx = (box1.centerX - box2.centerX).toFloat() - val dy = (box1.centerY - box2.centerY).toFloat() - return sqrt(dx.pow(2) + dy.pow(2)) - } - fun generateXmlLayout( detections: List, annotations: Map, @@ -71,93 +62,166 @@ object YoloToXmlConverter { targetDpHeight: Int, wrapInScroll: Boolean = true ): String { + val widgets = detections + .filter { it.isYolo && it.label != "widget_tag" } + .distinctBy { + if (it.label.startsWith("switch")) { + "${((it.boundingBox.top + it.boundingBox.bottom) / 2f).toInt() / 50}" + } else { + "${it.label}:${it.boundingBox.left}:${it.boundingBox.top}:${it.boundingBox.right}:${it.boundingBox.bottom}" + } + } + var scaledBoxes = widgets.map { scaleDetection(it, sourceImageWidth, sourceImageHeight, targetDpWidth, targetDpHeight) } - val widgetTags = detections.filter { - it.label == "widget_tag" || (!it.isYolo && isTag(it.text)) - } - val widgets = detections.filter { it.isYolo }.filter { it.label != "widget_tag" } - - var scaledBoxes = widgets.map { - scaleDetection( - it, - sourceImageWidth, - sourceImageHeight, - targetDpWidth, - targetDpHeight - ) - } - - val parents = scaledBoxes.filter { it.label != "text" && !isTag(it.text) }.toMutableList() + val parents = scaledBoxes.filter { it.label != "text" && !isTag(it.text) } val texts = scaledBoxes.filter { it.label == "text" && !isTag(it.text) } + + scaledBoxes = assignTextToParents(parents, texts, scaledBoxes) + + val uiElements = scaledBoxes.filter { !isTag(it.text) } + val widgetTags = detections.filter { it.label == "widget_tag" || (!it.isYolo && isTag(it.text)) } + val canvasTags = widgetTags.map { scaleDetection(it, sourceImageWidth, sourceImageHeight, targetDpWidth, targetDpHeight) } + + val finalAnnotations = matchAnnotationsToElements(canvasTags, uiElements, annotations) + + val sortedBoxes = uiElements.sortedWith(compareBy({ it.y }, { it.x })) + return buildXml(sortedBoxes, finalAnnotations, targetDpHeight, wrapInScroll) + } + + private fun assignTextToParents(parents: List, texts: List, allBoxes: List): List { val consumedTexts = mutableSetOf() for (parent in parents) { texts.firstOrNull { text -> !consumedTexts.contains(text) && - Rect(parent.rect).let { intersection -> - intersection.intersect(text.rect) && - (intersection.width() * intersection.height()).let { intersectionArea -> - val textArea = text.w * text.h - textArea > 0 && (intersectionArea.toFloat() / textArea.toFloat()) > OVERLAP_THRESHOLD - } - } + Rect(parent.rect).let { intersection -> + intersection.intersect(text.rect) && + (intersection.width() * intersection.height()).let { intersectionArea -> + val textArea = text.w * text.h + textArea > 0 && (intersectionArea.toFloat() / textArea.toFloat()) > OVERLAP_THRESHOLD + } + } }?.let { parent.text = it.text consumedTexts.add(it) } } - scaledBoxes = scaledBoxes.filter { !consumedTexts.contains(it) } + return allBoxes.filter { !consumedTexts.contains(it) } + } - val uiElements = scaledBoxes.filter { !isTag(it.text) } - val canvasTags = widgetTags.map { - scaleDetection( - it, - sourceImageWidth, - sourceImageHeight, - targetDpWidth, - targetDpHeight - ) - } + private fun matchAnnotationsToElements( + canvasTags: List, + uiElements: List, + annotations: Map + ): Map { val finalAnnotations = mutableMapOf() val claimedWidgets = mutableSetOf() - val appliedAnnotationKeys = mutableSetOf() val deduplicatedTags = canvasTags .groupBy { normalizeTagText(it.text) } .map { (_, group) -> group.first() } - for (tagBox in deduplicatedTags) { - val normalizedText = normalizeTagText(tagBox.text) - val tagType = getTagType(normalizedText) ?: continue - val annotation = annotations[normalizedText] ?: continue + val tagsByWidgetType = annotations + .mapNotNull { (tagText, annotationText) -> + val normalizedTag = normalizeTagText(tagText) + val widgetType = getTagType(normalizedTag) ?: return@mapNotNull null + + val matchingTagBox = deduplicatedTags.find { normalizeTagText(it.text) == normalizedTag } - val closestElement = uiElements - .filter { it.label.startsWith(tagType) && it !in claimedWidgets } - .minByOrNull { distance(tagBox, it) } + TaggedAnnotation( + normalizedTag = normalizedTag, + widgetType = widgetType, + annotation = annotationText, + tagBox = matchingTagBox + ) + } + .groupBy { it.widgetType } + + val widgetsByType = uiElements.groupBy { normalizeWidgetType(it.label) } + + for ((widgetType, taggedAnnotations) in tagsByWidgetType) { + val candidateWidgets = widgetsByType[widgetType] + ?.sortedWith(compareBy({ it.y }, { it.x })) + ?: continue + + val sortedTags = taggedAnnotations.sortedWith( + compareBy( + { extractTagOrdinal(it.normalizedTag) ?: Int.MAX_VALUE }, + { it.tagBox?.y ?: Int.MAX_VALUE }, + { it.tagBox?.x ?: Int.MAX_VALUE } + ) + ) - if (closestElement != null) { - finalAnnotations[closestElement] = annotation - claimedWidgets.add(closestElement) - appliedAnnotationKeys.add(normalizedText) + for (taggedAnnotation in sortedTags) { + val ordinal = extractTagOrdinal(taggedAnnotation.normalizedTag) + val matchedWidget = findWidgetByOrdinalOrFallback( + ordinal = ordinal, + tagBox = taggedAnnotation.tagBox, + candidates = candidateWidgets, + claimedWidgets = claimedWidgets + ) ?: continue + + finalAnnotations[matchedWidget] = taggedAnnotation.annotation + claimedWidgets.add(matchedWidget) } } - for ((tagText, annotation) in annotations) { - val normalizedTagText = normalizeTagText(tagText) - if (normalizedTagText in appliedAnnotationKeys) continue - val tagType = getTagType(normalizedTagText) ?: continue - val unclaimed = uiElements - .filter { it.label.startsWith(tagType) && it !in claimedWidgets } - .sortedWith(compareBy({ it.y }, { it.x })) - .firstOrNull() - if (unclaimed != null) { - finalAnnotations[unclaimed] = annotation - claimedWidgets.add(unclaimed) + return finalAnnotations + } + + private data class TaggedAnnotation( + val normalizedTag: String, + val widgetType: String, + val annotation: String, + val tagBox: ScaledBox? + ) + + private fun normalizeWidgetType(label: String): String = when { + label.startsWith("text_entry_box") -> "text_entry_box" + label.startsWith("button") -> "button" + label.startsWith("switch") -> "switch" + label.startsWith("checkbox") -> "checkbox" + label.startsWith("radio") -> "radio" + label.startsWith("dropdown") -> "dropdown" + label.startsWith("slider") -> "slider" + label.startsWith("image_placeholder") -> "image_placeholder" + else -> label + } + + private fun extractTagOrdinal(tag: String): Int? { + return tag.substringAfter('-', "").toIntOrNull() + } + + private fun findWidgetByOrdinalOrFallback( + ordinal: Int?, + tagBox: ScaledBox?, + candidates: List, + claimedWidgets: Set + ): ScaledBox? { + val available = candidates.filter { it !in claimedWidgets } + if (available.isEmpty()) return null + + if (ordinal != null) { + val zeroBasedMatch = candidates.getOrNull(ordinal) + if (zeroBasedMatch != null && zeroBasedMatch !in claimedWidgets) { + return zeroBasedMatch + } + + val oneBasedMatch = candidates.getOrNull(ordinal - 1) + if (oneBasedMatch != null && oneBasedMatch !in claimedWidgets) { + return oneBasedMatch } } - val sortedBoxes = uiElements.sortedWith(compareBy({ it.y }, { it.x })) - return buildXml(sortedBoxes, finalAnnotations, targetDpWidth, targetDpHeight, wrapInScroll) + if (tagBox != null) { + return available.minByOrNull { candidate -> + val verticalDistance = kotlin.math.abs(tagBox.centerY - candidate.centerY) + val horizontalDistance = kotlin.math.abs(tagBox.centerX - candidate.centerX) + (verticalDistance * 2) + horizontalDistance + } + } + + return available.minByOrNull { it.y } } private fun scaleDetection( @@ -167,10 +231,10 @@ object YoloToXmlConverter { return ScaledBox(detection.label, detection.text, 0, 0, MIN_W_ANY, MIN_H_ANY, MIN_W_ANY / 2, MIN_H_ANY / 2, Rect(0, 0, MIN_W_ANY, MIN_H_ANY)) } val rect = detection.boundingBox - val normCx = rect.centerX() / sourceWidth - val normCy = rect.centerY() / sourceHeight - val normW = rect.width() / sourceWidth - val normH = rect.height() / sourceHeight + val normCx = ((rect.left + rect.right) / 2f) / sourceWidth + val normCy = ((rect.top + rect.bottom) / 2f) / sourceHeight + val normW = (rect.right - rect.left) / sourceWidth + val normH = (rect.bottom - rect.top) / sourceHeight val x = max(0, ((normCx - normW / 2.0) * targetW).roundToInt()) val y = max(0, ((normCy - normH / 2.0) * targetH).roundToInt()) val w = max(MIN_W_ANY, (normW * targetW).roundToInt()) @@ -189,7 +253,9 @@ object YoloToXmlConverter { } private fun escapeXmlAttr(value: String): String = - value.replace("&", "&") + value.replace("|", "") + .trim() + .replace("&", "&") .replace("<", "<") .replace(">", ">") .replace("\"", """) @@ -200,7 +266,7 @@ object YoloToXmlConverter { "button" -> "Button" "image_placeholder", "icon" -> "ImageView" "checkbox_unchecked", "checkbox_checked" -> "CheckBox" - "radio_unchecked", "radio_checked" -> "RadioButton" + "radio_button_unchecked", "radio_button_checked" -> "RadioButton" "switch_off", "switch_on" -> "Switch" "text_entry_box" -> "EditText" "dropdown" -> "Spinner" @@ -212,7 +278,6 @@ object YoloToXmlConverter { private fun buildXml( boxes: List, annotations: Map, - targetDpWidth: Int, targetDpHeight: Int, wrapInScroll: Boolean ): String { @@ -255,8 +320,8 @@ object YoloToXmlConverter { val parsedAttrs = parseMarginAnnotations(annotations[box], tag) - val width = parsedAttrs["android:layout_width"] ?: "wrap_content" - val height = parsedAttrs["android:layout_height"] ?: "wrap_content" + val width = parsedAttrs["android:layout_width"] ?: "${box.w}dp" + val height = parsedAttrs["android:layout_height"] ?: "${box.h}dp" val id = parsedAttrs["android:id"]?.substringAfterLast('/') ?: defaultId val writtenAttrs = mutableSetOf( @@ -269,68 +334,99 @@ object YoloToXmlConverter { xml.append("$indent android:layout_height=\"${escapeXmlAttr(height)}\"\n") when (tag) { - "TextView", "Button", "CheckBox", "RadioButton", "Switch" -> { - val viewText = parsedAttrs["android:text"] - ?: box.text.takeIf { it.isNotEmpty() && it != box.label } - ?: box.label - xml.append("$indent android:text=\"${escapeXmlAttr(viewText)}\"\n") - writtenAttrs.add("android:text") - if (tag == "TextView") { - val textSize = parsedAttrs["android:textSize"] ?: "16sp" - xml.append("$indent android:textSize=\"${escapeXmlAttr(textSize)}\"\n") - writtenAttrs.add("android:textSize") - } - if (label.contains("_checked") || label.contains("_on")) { - val checked = parsedAttrs["android:checked"] ?: "true" - xml.append("$indent android:checked=\"${escapeXmlAttr(checked)}\"\n") - writtenAttrs.add("android:checked") - } - xml.append("$indent tools:ignore=\"HardcodedText\"\n") - writtenAttrs.add("tools:ignore") - } + "TextView", "Button", "CheckBox", "RadioButton", "Switch" -> + appendTextViewAttributes(xml, indent, parsedAttrs, box, label, tag, writtenAttrs) - "EditText" -> { - val hint = parsedAttrs["android:hint"] - ?: box.text.ifEmpty { "Enter text..." } - xml.append("$indent android:hint=\"${escapeXmlAttr(hint)}\"\n") - writtenAttrs.add("android:hint") - val inputType = parsedAttrs["android:inputType"] ?: "text" - xml.append("$indent android:inputType=\"${escapeXmlAttr(inputType)}\"\n") - writtenAttrs.add("android:inputType") - xml.append("$indent tools:ignore=\"HardcodedText\"\n") - writtenAttrs.add("tools:ignore") - } + "EditText" -> + appendEditTextAttributes(xml, indent, parsedAttrs, box, writtenAttrs) - "ImageView" -> { - xml.append("$indent android:contentDescription=\"${escapeXmlAttr(label)}\"\n") - writtenAttrs.add("android:contentDescription") - val scaleType = parsedAttrs["android:scaleType"] ?: "centerCrop" - xml.append("$indent android:scaleType=\"${escapeXmlAttr(scaleType)}\"\n") - writtenAttrs.add("android:scaleType") - val bg = parsedAttrs["android:background"] ?: "#E0E0E0" - xml.append("$indent android:background=\"${escapeXmlAttr(bg)}\"\n") - writtenAttrs.add("android:background") - } + "ImageView" -> + appendImageViewAttributes(xml, indent, parsedAttrs, label, writtenAttrs) } parsedAttrs.forEach { (key, value) -> if (key !in writtenAttrs) { - // Add this logic to handle specific attribute value casing - val finalValue = when (key) { - "android:layout_gravity" -> value.lowercase() - else -> value - } - xml.append("$indent $key=\"${escapeXmlAttr(finalValue)}\"\n") + xml.append("$indent $key=\"${escapeXmlAttr(value)}\"\n") writtenAttrs.add(key) } - } xml.append("$indent/>") Log.d(TAG, "appendSimpleView: $xml") } + private fun appendTextViewAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + box: ScaledBox, + label: String, + tag: String, + writtenAttrs: MutableSet + ) { + val rawViewText = parsedAttrs["android:text"] + ?: box.text.takeIf { it.isNotEmpty() && it != box.label } + ?: when (tag) { + "Switch" -> "Switch" + "CheckBox" -> "CheckBox" + "RadioButton" -> "RadioButton" + else -> box.label + } + + xml.append("$indent android:text=\"${escapeXmlAttr(rawViewText)}\"\n") + writtenAttrs.add("android:text") + if (tag == "TextView") { + val textSize = parsedAttrs["android:textSize"] ?: "16sp" + xml.append("$indent android:textSize=\"${escapeXmlAttr(textSize)}\"\n") + writtenAttrs.add("android:textSize") + } + if (label.contains("_checked") || label.contains("_on")) { + val checked = parsedAttrs["android:checked"] ?: "true" + xml.append("$indent android:checked=\"${escapeXmlAttr(checked)}\"\n") + writtenAttrs.add("android:checked") + } + xml.append("$indent tools:ignore=\"HardcodedText\"\n") + writtenAttrs.add("tools:ignore") + } + + private fun appendEditTextAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + box: ScaledBox, + writtenAttrs: MutableSet + ) { + val rawHint = parsedAttrs["android:hint"] ?: box.text.ifEmpty { "Enter text..." } + + xml.append("$indent android:hint=\"${escapeXmlAttr(rawHint)}\"\n") + writtenAttrs.add("android:hint") + + val inputType = parsedAttrs["android:inputType"] ?: "text" + xml.append("$indent android:inputType=\"${escapeXmlAttr(inputType)}\"\n") + writtenAttrs.add("android:inputType") + + xml.append("$indent tools:ignore=\"HardcodedText\"\n") + writtenAttrs.add("tools:ignore") + } + + private fun appendImageViewAttributes( + xml: StringBuilder, + indent: String, + parsedAttrs: Map, + label: String, + writtenAttrs: MutableSet + ) { + xml.append("$indent android:contentDescription=\"${escapeXmlAttr(label)}\"\n") + writtenAttrs.add("android:contentDescription") + val scaleType = parsedAttrs["android:scaleType"] ?: "centerCrop" + xml.append("$indent android:scaleType=\"${escapeXmlAttr(scaleType)}\"\n") + writtenAttrs.add("android:scaleType") + val bg = parsedAttrs["android:background"] ?: "#E0E0E0" + xml.append("$indent android:background=\"${escapeXmlAttr(bg)}\"\n") + writtenAttrs.add("android:background") + } + private fun parseMarginAnnotations(annotation: String?, tag: String): Map { return FuzzyAttributeParser.parse(annotation, tag) } -} \ No newline at end of file +}