Skip to content

Commit d65d471

Browse files
authored
* Fixes: rdkit#3415 * add another test
1 parent 9a04aea commit d65d471

2 files changed

Lines changed: 105 additions & 3 deletions

File tree

Code/GraphMol/FileParsers/MolSGroupParsing.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,31 @@ void ParseV3000SAPLabel(RWMol *mol, SubstanceGroup &sgroup,
737737
std::string ParseV3000StringPropLabel(std::stringstream &stream) {
738738
std::string strValue;
739739

740-
// TODO: this should be improved to be able to handle escaped quotes
741-
742740
auto nextChar = stream.peek();
743741
if (nextChar == '"') {
742+
// skip the opening quote:
744743
stream.get();
745-
std::getline(stream, strValue, '"');
744+
745+
// this is a bit gross because it's legal to include a \" in a value,
746+
// but the way that's done is by doubling it. So
747+
// FIELDINFO=""""
748+
// should assign the value \" to FIELDINFO
749+
char chr;
750+
while (stream.get(chr)) {
751+
if (chr == '"') {
752+
nextChar = stream.peek();
753+
754+
// if the next element in the stream is a \" then we have a quoted \".
755+
// Otherwise we're done
756+
if (nextChar != '"') {
757+
break;
758+
} else {
759+
// skip the second \"
760+
stream.get();
761+
}
762+
}
763+
strValue += chr;
764+
}
746765
} else if (nextChar == '\'') {
747766
std::getline(stream, strValue, '\'');
748767
} else {

Code/GraphMol/FileParsers/file_parsers_catch.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,3 +1974,86 @@ M END)CTAB"_ctab;
19741974
CHECK(m->getNumBonds() == 0);
19751975
}
19761976
}
1977+
1978+
TEST_CASE("github #3415: problem parsing SGroup data containing \" ", "[bug]") {
1979+
SECTION("basics") {
1980+
auto m = R"CTAB(
1981+
Mrv2014 09172018222D
1982+
1983+
0 0 0 0 0 999 V3000
1984+
M V30 BEGIN CTAB
1985+
M V30 COUNTS 6 6 1 0 0
1986+
M V30 BEGIN ATOM
1987+
M V30 1 C 1.3337 2.31 0 0
1988+
M V30 2 C 2.6674 1.54 0 0
1989+
M V30 3 C 2.6674 -0 0 0
1990+
M V30 4 C 1.3337 -0.77 0 0
1991+
M V30 5 C 0 0 0 0
1992+
M V30 6 C 0 1.54 0 0
1993+
M V30 END ATOM
1994+
M V30 BEGIN BOND
1995+
M V30 1 2 1 2
1996+
M V30 2 1 2 3
1997+
M V30 3 2 3 4
1998+
M V30 4 1 4 5
1999+
M V30 5 2 5 6
2000+
M V30 6 1 1 6
2001+
M V30 END BOND
2002+
M V30 BEGIN SGROUP
2003+
M V30 1 DAT 0 ATOMS=(1 1) FIELDNAME=Tempstruct FIELDINFO="""" -
2004+
M V30 FIELDDISP=" 2.1037 1.5400 DA ALL 0 0" QUERYOP="""" -
2005+
M V30 FIELDDATA=Foo1
2006+
M V30 END SGROUP
2007+
M V30 END CTAB
2008+
M END
2009+
)CTAB"_ctab;
2010+
REQUIRE(m);
2011+
CHECK(m->getNumAtoms() == 6);
2012+
CHECK(m->getNumBonds() == 6);
2013+
auto sgs = getSubstanceGroups(*m);
2014+
REQUIRE(sgs.size() == 1);
2015+
CHECK(sgs[0].getProp<std::string>("TYPE") == "DAT");
2016+
CHECK(sgs[0].getProp<std::string>("FIELDINFO") == "\"");
2017+
CHECK(sgs[0].getProp<std::string>("QUERYOP") == "\"");
2018+
}
2019+
SECTION("empty string") {
2020+
auto m = R"CTAB(
2021+
Mrv2014 09172018222D
2022+
2023+
0 0 0 0 0 999 V3000
2024+
M V30 BEGIN CTAB
2025+
M V30 COUNTS 6 6 1 0 0
2026+
M V30 BEGIN ATOM
2027+
M V30 1 C 1.3337 2.31 0 0
2028+
M V30 2 C 2.6674 1.54 0 0
2029+
M V30 3 C 2.6674 -0 0 0
2030+
M V30 4 C 1.3337 -0.77 0 0
2031+
M V30 5 C 0 0 0 0
2032+
M V30 6 C 0 1.54 0 0
2033+
M V30 END ATOM
2034+
M V30 BEGIN BOND
2035+
M V30 1 2 1 2
2036+
M V30 2 1 2 3
2037+
M V30 3 2 3 4
2038+
M V30 4 1 4 5
2039+
M V30 5 2 5 6
2040+
M V30 6 1 1 6
2041+
M V30 END BOND
2042+
M V30 BEGIN SGROUP
2043+
M V30 1 DAT 0 ATOMS=(1 1) FIELDNAME=Tempstruct FIELDINFO="" -
2044+
M V30 FIELDDISP=" 2.1037 1.5400 DA ALL 0 0" QUERYOP="""" -
2045+
M V30 FIELDDATA=Foo1
2046+
M V30 END SGROUP
2047+
M V30 END CTAB
2048+
M END
2049+
)CTAB"_ctab;
2050+
REQUIRE(m);
2051+
CHECK(m->getNumAtoms() == 6);
2052+
CHECK(m->getNumBonds() == 6);
2053+
auto sgs = getSubstanceGroups(*m);
2054+
REQUIRE(sgs.size() == 1);
2055+
CHECK(sgs[0].getProp<std::string>("TYPE") == "DAT");
2056+
CHECK(sgs[0].getProp<std::string>("FIELDINFO").empty());
2057+
CHECK(sgs[0].getProp<std::string>("QUERYOP") == "\"");
2058+
}
2059+
}

0 commit comments

Comments
 (0)