diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/WidgetTagParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/WidgetTagParser.kt index 70812edd49..1b8fa879ea 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/WidgetTagParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/WidgetTagParser.kt @@ -6,7 +6,7 @@ package org.appdevforall.codeonthego.computervision.domain */ internal object WidgetTagParser { private val tagRegex = Regex("^(?i)(B|P|D|T|C|R|SW|S)-[A-Z0-9_]+$") - private val tagExtractRegex = Regex("^(?i)([A-Z0-9\\s]+)([\\s\\-_.]+)([A-Z0-9_\\-]+)") + private val tagExtractRegex = Regex("^(?i)(B|P|D|T|C|R|SW|S|8|8W|S8)([\\s\\-_.,|/]*)([A-Z0-9_\\-]+)") private val VALID_PREFIXES = setOf("B", "P", "D", "T", "C", "R", "SW", "S") fun isTag(text: String): Boolean { @@ -75,7 +75,9 @@ internal object WidgetTagParser { private fun isValidTagMatch(match: MatchResult): Boolean { val separator = match.groupValues[2] val rawToken = match.groupValues[3] - return !(separator.isEmpty() && rawToken.firstOrNull()?.isLetter() == true) + + if (separator.isNotEmpty()) return true + return rawToken.all(::isNumericLikeOcrChar) } private fun normalizePrefix(rawPrefix: String): String { @@ -121,6 +123,6 @@ internal object WidgetTagParser { * Determines whether a character is a digit or a letter frequently confused with a digit by OCR. */ private fun isNumericLikeOcrChar(char: Char): Boolean { - return char.isDigit() || char in setOf('O', 'I', 'L', 'Z', 'S', 'B', '!') + return char.isDigit() || char.uppercaseChar() in setOf('O', 'I', 'L', 'Z', 'S', 'B', '!') } } diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/UiGrammarValidator.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/UiGrammarValidator.kt index 54affbbe73..b464b5fd33 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/UiGrammarValidator.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/UiGrammarValidator.kt @@ -9,7 +9,9 @@ class UiGrammarValidator { CheckBoxGrammar, SwitchGrammar, RadioGroupGrammar, - SliderGrammar + SliderGrammar, + ButtonGrammar, + TextViewGrammar, ).associateBy { it.tag } fun enforceGrammar(rawParsedAttributes: Map, tag: String): Map { diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/WidgetGrammar.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/WidgetGrammar.kt index 766771912b..481e488731 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/WidgetGrammar.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/grammar/WidgetGrammar.kt @@ -27,7 +27,9 @@ interface LayoutGrammar : WidgetGrammar { AttributeKey.GRAVITY.xmlName to CategoricalValidator(GravityValueSet.values), AttributeKey.LAYOUT_WEIGHT.xmlName to PassThroughValidator, AttributeKey.PADDING.xmlName to DimensionValidator, - AttributeKey.VISIBILITY.xmlName to CategoricalValidator(VisibilityValueSet.values) + AttributeKey.VISIBILITY.xmlName to CategoricalValidator(VisibilityValueSet.values), + AttributeKey.BACKGROUND.xmlName to PassThroughValidator, + AttributeKey.BACKGROUND_TINT.xmlName to PassThroughValidator ) } @@ -64,10 +66,7 @@ object ImageViewGrammar : LayoutGrammar { override val tag = "ImageView" override val attributes = super.attributes + mapOf( - AttributeKey.SRC.xmlName to PassThroughValidator, - AttributeKey.LAYOUT_GRAVITY.xmlName to CategoricalValidator(GravityValueSet.values), - AttributeKey.BACKGROUND.xmlName to PassThroughValidator, - AttributeKey.BACKGROUND_TINT.xmlName to PassThroughValidator + AttributeKey.SRC.xmlName to PassThroughValidator ) } @@ -108,3 +107,17 @@ object SliderGrammar : LayoutGrammar { AttributeKey.STYLE.xmlName to SliderStyleValidator ) } + +object TextViewGrammar : TextGrammar { + override val tag = "TextView" + override val attributes = super.attributes + mapOf( + AttributeKey.TEXT.xmlName to PassThroughValidator + ) +} + +object ButtonGrammar : TextGrammar { + override val tag = "Button" + override val attributes = super.attributes + mapOf( + AttributeKey.TEXT.xmlName to PassThroughValidator + ) +} diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/AttributeModels.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/AttributeModels.kt index a9de4fb7b4..de1ffffeee 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/AttributeModels.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/AttributeModels.kt @@ -18,6 +18,18 @@ object GravityValueSet : AttributeValueSet { ) } +object DimensionValueSet : AttributeValueSet { + const val WRAP_CONTENT = "wrap_content" + const val MATCH_PARENT = "match_parent" + + override val values = listOf(WRAP_CONTENT, MATCH_PARENT) + + val matchKeywords = setOf("match", "parent") + val wrapKeywords = setOf("wrap", "content", "wrapcan") + + val allKeywords = matchKeywords + wrapKeywords +} + object VisibilityValueSet : AttributeValueSet { override val values = listOf( "visible", diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/FuzzyAttributeParser.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/FuzzyAttributeParser.kt index 2b6ca1b5ec..6ac4c917f6 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/FuzzyAttributeParser.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/FuzzyAttributeParser.kt @@ -25,6 +25,13 @@ object FuzzyAttributeParser { ValueType.RAW to ValueCleaner { it } ) + private val numericTypes = setOf( + ValueType.DIMENSION, + ValueType.SP_DIMENSION, + ValueType.INTEGER, + ValueType.FLOAT + ) + fun parse(annotation: String?, tag: String): Map { if (annotation.isNullOrBlank()) return emptyMap() @@ -73,8 +80,19 @@ object FuzzyAttributeParser { } private fun shouldTreatTokenAsValue(token: String, currentKey: AttributeKey?): Boolean { - if (currentKey != AttributeKey.INPUT_TYPE) return false - return token.trim().lowercase() in inputTypeValues + val lowerToken = token.trim().lowercase() + + return when { + currentKey == AttributeKey.INPUT_TYPE && lowerToken in inputTypeValues -> true + currentKey?.valueType == ValueType.COLOR && isColorToken(lowerToken) -> true + currentKey?.valueType == ValueType.DIMENSION && DimensionValueSet.allKeywords.any { it in lowerToken } -> true + currentKey?.valueType in numericTypes -> lowerToken.any { it.isDigit() } + else -> false + } + } + + private fun isColorToken(token: String): Boolean { + return token.startsWith("#") || token.startsWith("@") || token in ColorCleaner.colorMap } private fun flushAttribute(key: AttributeKey?, rawValue: String, tag: String, destination: MutableMap) { @@ -85,7 +103,9 @@ object FuzzyAttributeParser { if (cleanedValue.isNotEmpty()) { val (xmlAttr, finalValue) = resolveXmlAttribute(key, cleanedValue, tag) - destination[xmlAttr] = finalValue + if (!destination.containsKey(xmlAttr)) { + destination[xmlAttr] = finalValue + } } } diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/ValueCleanersImpl.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/ValueCleanersImpl.kt index b525c9b076..dc12fef7bb 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/ValueCleanersImpl.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/ValueCleanersImpl.kt @@ -24,66 +24,49 @@ internal object TextContentCleaner : ValueCleaner { internal object NumberCleaner : ValueCleaner { - private val ocrLetterOToZeroRegex = Regex("[oO]") - private val ocrLetterIToOneRegex = Regex("[lI]") - private val ocrLetterZToTwoRegex = Regex("[zZ]") - private val ocrLetterSToFiveRegex = Regex("[sS]") - private val ocrLetterBToSixRegex = Regex("[bB]") + private val ocrCharMap = mapOf( + 'O' to '0', 'A' to '0', '@' to '0', 'Q' to '0', + 'L' to '1', 'I' to '1', '|' to '1', '!' to '1', '/' to '1', '\\' to '1', + '(' to '1', ')' to '1', '[' to '1', ']' to '1', + 'Z' to '2', 'S' to '5', 'B' to '6' + ) override fun clean(rawValue: String): String { - val match = Regex("-?[\\doOlIzZsSbB]+").find(rawValue) ?: return rawValue - return match.value - .replace(ocrLetterOToZeroRegex, "0") - .replace(ocrLetterIToOneRegex, "1") - .replace(ocrLetterZToTwoRegex, "2") - .replace(ocrLetterSToFiveRegex, "5") - .replace(ocrLetterBToSixRegex, "6") + val translated = rawValue.map { ocrCharMap[it.uppercaseChar()] ?: it }.joinToString("") + return Regex("-?\\d+").find(translated)?.value ?: rawValue } } internal object DimensionCleaner : ValueCleaner { - private val matchKeywords = setOf("match", "parent") - private val wrapKeywords = setOf("wrap", "content", "wrapcan") - private val DIMENSION_CONSTANTS = listOf("wrap_content", "match_parent") - private val explicitDimensionRegex = Regex("^(-?\\d+)(dp|sp|px|dip)$") + private val leadingNumberRegex = Regex("^-?\\d+") override fun clean(rawValue: String): String { - val trimmedValue = rawValue.trim() - val normalized = trimmedValue.lowercase().replace(" ", "_") + val trimmedValue = rawValue.trim().lowercase() + val normalized = trimmedValue.replace(" ", "_") - if (matchKeywords.any { it in normalized }) return "match_parent" - if (wrapKeywords.any { it in normalized }) return "wrap_content" + if (DimensionValueSet.matchKeywords.any { it in normalized }) return DimensionValueSet.MATCH_PARENT + if (DimensionValueSet.wrapKeywords.any { it in normalized }) return DimensionValueSet.WRAP_CONTENT - val fuzzyResult = FuzzySearch.extractOne(normalized, DIMENSION_CONSTANTS) + val fuzzyResult = FuzzySearch.extractOne(normalized, DimensionValueSet.values) if (fuzzyResult.score >= 60) return fuzzyResult.string - val fixedUnit = normalized.replace(Regex("0p$|op$|olp$"), "dp") - explicitDimensionRegex.matchEntire(fixedUnit)?.let { match -> - val normalizedNumber = normalizeOcrDimensionNumber(match.groupValues[1]) - return normalizedNumber + match.groupValues[2] - } - - val numericPart = NumberCleaner.clean(fixedUnit.replace("_", "")) - val normalizedNumericPart = normalizeOcrDimensionNumber(numericPart) - - return if (numericPart != fixedUnit) "${normalizedNumericPart}dp" else trimmedValue - } + val unitMatch = Regex("(dp|sp|px|in|mm|pt)$").find(trimmedValue) + val originalUnit = unitMatch?.value ?: "dp" - private fun normalizeOcrDimensionNumber(numericPart: String): String { - if (!numericPart.matches(Regex("-?\\d+"))) return numericPart + val firstToken = trimmedValue.substringBefore(" ") + val rawNumber = firstToken.removeSuffix(originalUnit).trim() + val numericPart = NumberCleaner.clean(rawNumber) - val isNegative = numericPart.startsWith("-") - val numericValue = numericPart.toLongOrNull() ?: return numericPart - val canonical = numericValue.toString() - val unsignedCanonical = canonical.removePrefix("-") + val numMatch = leadingNumberRegex.find(numericPart)?.value + ?: return trimmedValue + val correctedNum = removeOcrTrailingZero(numMatch) - // OCR sometimes reads the trailing "dp" as a single zero, turning 150dp into 1500. - if (unsignedCanonical.endsWith('0') && unsignedCanonical.toLong() >= 1000L) { - val normalizedValue = numericValue / 10L - return normalizedValue.toString() - } + return "$correctedNum$originalUnit" + } - return if (isNegative && numericValue == 0L) "0" else canonical + private fun removeOcrTrailingZero(num: String): String { + val isOcrArtifact = num.endsWith("0") && (num.toLongOrNull() ?: 0L) >= 1000L + return if (isOcrArtifact) num.dropLast(1) else num } } @@ -96,8 +79,10 @@ internal object SpDimensionCleaner : ValueCleaner { } internal object ColorCleaner : ValueCleaner { - private val colorMap = mapOf( - "red" to "#FF0000", "rel" to "#FF0000", "green" to "#00FF00", "blue" to "#0000FF", + val colorMap = mapOf( + "red" to "#FF0000", "rel" to "#FF0000", "rad" to "#FF0000", "reo" to "#FF0000", + "green" to "#00FF00", + "blue" to "#0000FF", "ine" to "#0000FF", "hne" to "#0000FF", "hlue" to "#0000FF", "ane" to "#0000FF", "lne" to "#0000FF", "black" to "#000000", "white" to "#FFFFFF", "gray" to "#808080", "grey" to "#808080", "dark_gray" to "#A9A9A9", "yellow" to "#FFFF00", "cyan" to "#00FFFF", "magenta" to "#FF00FF", "purple" to "#800080", @@ -109,13 +94,14 @@ internal object ColorCleaner : ValueCleaner { override fun clean(rawValue: String): String { if (rawValue.startsWith("#") || rawValue.startsWith("@")) return rawValue - val normalizedValue = rawValue.lowercase().replace(" ", "_") + + val normalizedValue = rawValue.lowercase().replace(Regex("[^a-z_]"), "").replace(" ", "_") val exactColor = colorMap[normalizedValue] if (exactColor != null) return exactColor val result = FuzzySearch.extractOne(normalizedValue, colorMap.keys.toList()) - return if (result.score >= 75) colorMap[result.string] ?: rawValue else rawValue + return if (result.score >= 70) colorMap[result.string] ?: rawValue else rawValue } } diff --git a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/sanitizer/OcrSanitizerRules.kt b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/sanitizer/OcrSanitizerRules.kt index cff8310d7a..ea5d1b1f22 100644 --- a/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/sanitizer/OcrSanitizerRules.kt +++ b/cv-image-to-xml/src/main/java/org/appdevforall/codeonthego/computervision/domain/parser/sanitizer/OcrSanitizerRules.kt @@ -3,8 +3,9 @@ package org.appdevforall.codeonthego.computervision.domain.parser.sanitizer class ColorSanitizer : DictionaryRegexSanitizer() { override val rawRules = mapOf( - "backgroundired" to "background red", - "backgroundred" to "background red" + "backgroundired" to "background: red", + "backgroundred" to "background: red", + "\\bback[a-z]*[-_.]?\\s*[:;]\\s*" to "background: " ) } @@ -16,8 +17,8 @@ class TextAttributeSanitizer : DictionaryRegexSanitizer() { class DimensionSanitizer : DictionaryRegexSanitizer() { override val rawRules = mapOf( - "[il]ayout\\.?\\s*w[io]l?[td]h\\.?" to "layout_width:", - "layout\\s*hei[sck]+t\\.?" to "layout_height:", + "[il]ay[a-z]*[-_.\\s]*w[a-z0-9]*\\.?\\s*[:;]\\s*" to "layout_width: ", + "[il]ay[a-z]*[-_.\\s]*hei[a-z0-9]*\\.?\\s*[:;]\\s*" to "layout_height: ", "m?w?at[ce]h[-_\\s]?p[ar]+ent" to "match_parent" ) } @@ -31,6 +32,8 @@ class MarginPaddingSanitizer : DictionaryRegexSanitizer() { class StructureSanitizer : DictionaryRegexSanitizer() { override val rawRules = mapOf( - "horizontal\\s+gravity\\s*:\\s*center\\s+layout" to "layout_gravity: center_horizontal" + "horizontal\\s+gravity\\s*:\\s*center\\s+layout" to "layout_gravity: center_horizontal", + "\\b[ilL][dl]\\b\\s*[:;]?" to "id: ", + "\\bS[ec][rt]\\b\\s*[:;]?" to "src: " ) }