Skip to content

Commit 07a3b98

Browse files
ivbergbrianrob
andauthored
Fix SourceLink parsing to support both wildcard and exact path mappings (#2355)
* Fix SourceLink parsing to support both wildcard and exact path mappings - Fixed regex bug that prevented parsing multiple SourceLink mappings - Updated ParseSourceLinkJson to accept both wildcard patterns and exact file paths - Enhanced GetUrlForFilePathUsingSourceLink with two-phase matching (exact then prefix) - Added comprehensive test for both mapping types - Resolves #2350 * Fix SourceLink to only use wildcard paths for prefix matching Per SourceLink spec rule 2: paths without wildcards should only be used for exact matching, not prefix matching. This change tracks whether each mapping was originally a wildcard pattern and only performs prefix matching for those entries. Previously, a non-wildcard path like 'C:\foo\bar.cs' could incorrectly match as a prefix for 'C:\foo\bar.cs.bak', violating the spec. * Fix test build --------- Co-authored-by: Brian Robbins <brianrob@microsoft.com>
1 parent ce38a7e commit 07a3b98

File tree

2 files changed

+152
-48
lines changed

2 files changed

+152
-48
lines changed

src/TraceEvent/Symbols/SymbolReader.cs

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,15 +1726,36 @@ internal bool GetUrlForFilePathUsingSourceLink(string buildTimeFilePath, out str
17261726

17271727
if (_sourceLinkMapping != null)
17281728
{
1729-
foreach (Tuple<string, string> map in _sourceLinkMapping)
1729+
// First, try to find an exact match for the buildTimeFilePath (non-wildcard entries)
1730+
foreach (Tuple<string, string, bool> map in _sourceLinkMapping)
17301731
{
17311732
string path = map.Item1;
1732-
string urlReplacement = map.Item2;
1733+
string urlTemplate = map.Item2;
1734+
bool isWildcard = map.Item3;
17331735

1734-
if (buildTimeFilePath.StartsWith(path, StringComparison.OrdinalIgnoreCase))
1736+
if (!isWildcard && buildTimeFilePath.Equals(path, StringComparison.OrdinalIgnoreCase))
17351737
{
1738+
// Exact match found - this is a direct file mapping without wildcards
1739+
relativeFilePath = "";
1740+
url = urlTemplate; // Use the URL as-is for exact matches
1741+
return true;
1742+
}
1743+
}
1744+
1745+
// If no exact match, try prefix matching (wildcard patterns only)
1746+
// Per spec rule 2: only paths that had a wildcard (*) should be used for prefix matching
1747+
foreach (Tuple<string, string, bool> map in _sourceLinkMapping)
1748+
{
1749+
string path = map.Item1;
1750+
string urlTemplate = map.Item2;
1751+
bool isWildcard = map.Item3;
1752+
1753+
// Only use wildcard patterns for prefix matching
1754+
if (isWildcard && buildTimeFilePath.StartsWith(path, StringComparison.OrdinalIgnoreCase))
1755+
{
1756+
// Prefix match - extract the relative path and substitute into URL
17361757
relativeFilePath = buildTimeFilePath.Substring(path.Length, buildTimeFilePath.Length - path.Length).Replace('\\', '/');
1737-
url = urlReplacement.Replace("*", string.Join("/", relativeFilePath.Split('/').Select(Uri.EscapeDataString)));
1758+
url = urlTemplate.Replace("*", string.Join("/", relativeFilePath.Split('/').Select(Uri.EscapeDataString)));
17381759
return true;
17391760
}
17401761
}
@@ -1746,11 +1767,12 @@ internal bool GetUrlForFilePathUsingSourceLink(string buildTimeFilePath, out str
17461767
}
17471768

17481769
/// <summary>
1749-
/// Parses SourceLink information and returns a list of filepath -> url Prefix tuples.
1770+
/// Parses SourceLink information and returns a list of (filepath, url, isWildcard) tuples.
1771+
/// Supports both wildcard patterns ("path\\*" -> "url/*") and exact path mappings ("path\\file.h" -> "url").
17501772
/// </summary>
1751-
private List<Tuple<string, string>> ParseSourceLinkJson(IEnumerable<string> sourceLinkContents)
1773+
private List<Tuple<string, string, bool>> ParseSourceLinkJson(IEnumerable<string> sourceLinkContents)
17521774
{
1753-
List<Tuple<string, string>> ret = null;
1775+
List<Tuple<string, string, bool>> ret = null;
17541776
foreach (string sourceLinkJson in sourceLinkContents)
17551777
{
17561778
// TODO this is not right for corner cases (e.g. file paths with " or , } in them)
@@ -1760,24 +1782,26 @@ private List<Tuple<string, string>> ParseSourceLinkJson(IEnumerable<string> sour
17601782
string mappings = m.Groups[1].Value;
17611783
while (!string.IsNullOrWhiteSpace(mappings))
17621784
{
1763-
m = Regex.Match(m.Groups[1].Value, "^\\s*\"(.*?)\"\\s*:\\s*\"(.*?)\"\\s*,?(.*)", RegexOptions.Singleline);
1785+
m = Regex.Match(mappings, "^\\s*\"(.*?)\"\\s*:\\s*\"(.*?)\"\\s*,?(.*)", RegexOptions.Singleline);
17641786
if (m.Success)
17651787
{
17661788
if (ret == null)
17671789
{
1768-
ret = new List<Tuple<string, string>>();
1790+
ret = new List<Tuple<string, string, bool>>();
17691791
}
17701792

17711793
string pathSpec = m.Groups[1].Value.Replace("\\\\", "\\");
1772-
if (pathSpec.EndsWith("*"))
1773-
{
1774-
pathSpec = pathSpec.Substring(0, pathSpec.Length - 1); // Remove the *
1775-
ret.Add(new Tuple<string, string>(pathSpec, m.Groups[2].Value));
1776-
}
1777-
else
1794+
string urlSpec = m.Groups[2].Value;
1795+
bool isWildcard = pathSpec.EndsWith("*");
1796+
1797+
// Support both wildcard patterns and exact path mappings
1798+
if (isWildcard)
17781799
{
1779-
_log.WriteLine("Warning: {0} does not end in *, skipping this mapping.", pathSpec);
1800+
// Wildcard pattern: remove the * from the path
1801+
pathSpec = pathSpec.Substring(0, pathSpec.Length - 1);
17801802
}
1803+
// Add the mapping with wildcard flag
1804+
ret.Add(new Tuple<string, string, bool>(pathSpec, urlSpec, isWildcard));
17811805

17821806
mappings = m.Groups[3].Value;
17831807
}
@@ -1799,7 +1823,7 @@ private List<Tuple<string, string>> ParseSourceLinkJson(IEnumerable<string> sour
17991823

18001824
private string _pdbPath;
18011825
private SymbolReader _reader;
1802-
private List<Tuple<string, string>> _sourceLinkMapping; // Used by SourceLink to map build paths to URLs (see GetUrlForFilePath)
1826+
private List<Tuple<string, string, bool>> _sourceLinkMapping; // Used by SourceLink to map build paths to URLs (path, url, isWildcard)
18031827
private bool _sourceLinkMappingInited; // Lazy init flag.
18041828
#endregion
18051829
}

src/TraceEvent/TraceEvent.Tests/Symbols/SymbolReaderTests.cs

Lines changed: 111 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,53 @@ public void SourceLinkUrlsAreEscaped()
153153
}
154154
}
155155

156+
[Fact]
157+
public void SourceLinkSupportsWildcardAndExactPathMappings()
158+
{
159+
// Create a test symbol module that returns SourceLink JSON with both wildcard and exact path mappings
160+
var testModule = new TestSymbolModuleWithSourceLink(_symbolReader);
161+
162+
// Test wildcard pattern matching
163+
bool result1 = testModule.GetUrlForFilePathUsingSourceLink(
164+
@"C:\src\myproject\subfolder\file.cs",
165+
out string url1,
166+
out string relativePath1);
167+
168+
Assert.True(result1, "Should match wildcard pattern");
169+
Assert.Equal("https://raw.githubusercontent.com/org/repo/commit/subfolder/file.cs", url1);
170+
Assert.Equal("subfolder/file.cs", relativePath1);
171+
172+
// Test exact path matching
173+
bool result2 = testModule.GetUrlForFilePathUsingSourceLink(
174+
@"c:\external\sdk\inc\header.h",
175+
out string url2,
176+
out string relativePath2);
177+
178+
Assert.True(result2, "Should match exact path");
179+
Assert.Equal("https://example.com/blobs/ABC123?download=true&filename=header.h", url2);
180+
Assert.Equal("", relativePath2);
181+
182+
// Test another wildcard pattern with escaped characters
183+
bool result3 = testModule.GetUrlForFilePathUsingSourceLink(
184+
@"C:\src\myproject\some folder\another file.cs",
185+
out string url3,
186+
out string relativePath3);
187+
188+
Assert.True(result3, "Should match wildcard pattern with spaces");
189+
Assert.Equal("https://raw.githubusercontent.com/org/repo/commit/some%20folder/another%20file.cs", url3);
190+
Assert.Equal("some folder/another file.cs", relativePath3);
191+
192+
// Test non-matching path
193+
bool result4 = testModule.GetUrlForFilePathUsingSourceLink(
194+
@"C:\other\path\file.cs",
195+
out string url4,
196+
out string relativePath4);
197+
198+
Assert.False(result4, "Should not match any pattern");
199+
Assert.Null(url4);
200+
Assert.Null(relativePath4);
201+
}
202+
156203
/// <summary>
157204
/// Tests that the checksum matching allows for different line endings.
158205
/// Open the PDB and try to retrieve the source code for one of the files,
@@ -370,31 +417,31 @@ public void MsfzFileDetectionWorks()
370417
var tempDir = Path.GetTempPath();
371418
var testFile = Path.Combine(tempDir, "test_msfz.pdb");
372419
var nonMsfzFile = Path.Combine(tempDir, "test_non_msfz.pdb");
373-
420+
374421
try
375422
{
376423
// Write MSFZ header followed by some dummy data
377424
var msfzHeader = "Microsoft MSFZ Container";
378425
var headerBytes = Encoding.UTF8.GetBytes(msfzHeader);
379426
var dummyData = new byte[] { 0x01, 0x02, 0x03, 0x04 };
380-
427+
381428
using (var stream = File.Create(testFile))
382429
{
383430
stream.Write(headerBytes, 0, headerBytes.Length);
384431
stream.Write(dummyData, 0, dummyData.Length);
385432
}
386-
433+
387434
// Use reflection to call the private IsMsfzFile method
388-
var method = typeof(SymbolReader).GetMethod("IsMsfzFile",
435+
var method = typeof(SymbolReader).GetMethod("IsMsfzFile",
389436
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
390-
437+
391438
var result = (bool)method.Invoke(_symbolReader, new object[] { testFile });
392-
439+
393440
Assert.True(result, "File with MSFZ header should be detected as MSFZ file");
394-
441+
395442
// Test with non-MSFZ file
396443
File.WriteAllText(nonMsfzFile, "This is not an MSFZ file");
397-
444+
398445
result = (bool)method.Invoke(_symbolReader, new object[] { nonMsfzFile });
399446
Assert.False(result, "File without MSFZ header should not be detected as MSFZ file");
400447
}
@@ -412,30 +459,30 @@ public void MsfzFileMovesToCorrectSubdirectory()
412459
{
413460
var tempDir = Path.Combine(Path.GetTempPath(), "msfz_test_" + Guid.NewGuid().ToString("N"));
414461
Directory.CreateDirectory(tempDir);
415-
462+
416463
try
417464
{
418465
var testFile = Path.Combine(tempDir, "test.pdb");
419-
466+
420467
// Create MSFZ file
421468
var msfzHeader = "Microsoft MSFZ Container";
422469
var headerBytes = Encoding.UTF8.GetBytes(msfzHeader);
423470
var dummyData = new byte[] { 0x01, 0x02, 0x03, 0x04 };
424-
471+
425472
using (var stream = File.Create(testFile))
426473
{
427474
stream.Write(headerBytes, 0, headerBytes.Length);
428475
stream.Write(dummyData, 0, dummyData.Length);
429476
}
430-
477+
431478
// Since MSFZ logic is now integrated into GetFileFromServer,
432479
// this test validates the MSFZ detection logic which remains the same
433-
var isMsfzMethod = typeof(SymbolReader).GetMethod("IsMsfzFile",
480+
var isMsfzMethod = typeof(SymbolReader).GetMethod("IsMsfzFile",
434481
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
435-
482+
436483
var isMsfz = (bool)isMsfzMethod.Invoke(_symbolReader, new object[] { testFile });
437484
Assert.True(isMsfz, "File should be detected as MSFZ file");
438-
485+
439486
// The file moving functionality is now tested through integration tests
440487
// since it's part of the GetFileFromServer method
441488
}
@@ -451,39 +498,40 @@ public void HttpRequestIncludesMsfzAcceptHeader()
451498
{
452499
// This test verifies that our HttpRequestMessage creation includes the MSFZ accept header
453500
// We'll create a minimal test by checking the private method behavior indirectly
454-
501+
455502
var tempDir = Path.Combine(Path.GetTempPath(), "msfz_http_test_" + Guid.NewGuid().ToString("N"));
456503
Directory.CreateDirectory(tempDir);
457504
var targetPath = Path.Combine(tempDir, "test.pdb");
458-
505+
459506
try
460507
{
461508
// Configure intercepting handler to capture the request with MSFZ content
462-
_handler.AddIntercept(new Uri("https://test.example.com/test.pdb"), HttpMethod.Get, HttpStatusCode.OK, () => {
509+
_handler.AddIntercept(new Uri("https://test.example.com/test.pdb"), HttpMethod.Get, HttpStatusCode.OK, () =>
510+
{
463511
var msfzContent = "Microsoft MSFZ Container\x00\x01\x02\x03";
464512
return new StringContent(msfzContent, Encoding.UTF8, "application/msfz0");
465513
});
466-
514+
467515
// This will trigger an HTTP request that should include the Accept header
468-
var method = typeof(SymbolReader).GetMethod("GetPhysicalFileFromServer",
516+
var method = typeof(SymbolReader).GetMethod("GetPhysicalFileFromServer",
469517
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
470-
471-
var result = (bool)method.Invoke(_symbolReader, new object[] {
472-
"https://test.example.com",
473-
"test.pdb",
474-
targetPath,
475-
null
518+
519+
var result = (bool)method.Invoke(_symbolReader, new object[] {
520+
"https://test.example.com",
521+
"test.pdb",
522+
targetPath,
523+
null
476524
});
477-
525+
478526
// Verify that the download was successful
479527
Assert.True(result, "GetPhysicalFileFromServer should succeed with MSFZ content");
480-
528+
481529
// In the new architecture, GetPhysicalFileFromServer just downloads the file
482530
// The MSFZ moving logic is handled by GetFileFromServer
483531
Assert.True(File.Exists(targetPath), "Downloaded file should exist at target path");
484-
532+
485533
// Verify the content is MSFZ
486-
var isMsfzMethod = typeof(SymbolReader).GetMethod("IsMsfzFile",
534+
var isMsfzMethod = typeof(SymbolReader).GetMethod("IsMsfzFile",
487535
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
488536
var isMsfz = (bool)isMsfzMethod.Invoke(_symbolReader, new object[] { targetPath });
489537
Assert.True(isMsfz, "Downloaded file should be detected as MSFZ");
@@ -577,5 +625,37 @@ protected override Task<HttpResponseMessage> SendAsync(HttpRequestMessage reques
577625
return base.SendAsync(request, cancellationToken);
578626
}
579627
}
628+
629+
/// <summary>
630+
/// A test symbol module that provides SourceLink JSON for testing.
631+
/// </summary>
632+
private class TestSymbolModuleWithSourceLink : ManagedSymbolModule
633+
{
634+
public TestSymbolModuleWithSourceLink(SymbolReader reader)
635+
: base(reader, "test.pdb")
636+
{
637+
}
638+
639+
public override SourceLocation SourceLocationForManagedCode(uint methodMetadataToken, int ilOffset)
640+
{
641+
// Not used in this test
642+
return null;
643+
}
644+
645+
protected override IEnumerable<string> GetSourceLinkJson()
646+
{
647+
// Return SourceLink JSON with both wildcard and exact path mappings
648+
// This mimics the example from issue #2350
649+
return new[]
650+
{
651+
@"{
652+
""documents"": {
653+
""C:\\src\\myproject\\*"": ""https://raw.githubusercontent.com/org/repo/commit/*"",
654+
""c:\\external\\sdk\\inc\\header.h"": ""https://example.com/blobs/ABC123?download=true&filename=header.h""
655+
}
656+
}"
657+
};
658+
}
659+
}
580660
}
581-
}
661+
}

0 commit comments

Comments
 (0)