Skip to content

Commit eee34dd

Browse files
committed
Merge discvr-21.11 to develop
2 parents 3cbfbb5 + 513fd68 commit eee34dd

10 files changed

Lines changed: 149 additions & 96 deletions

File tree

.gitignore

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,17 @@
66
/mcc/resources/views/gen
77
/mcc/resources/web/mcc/gen
88

9+
elispot_assay/resources/credits/dependencies.txt
10+
elispot_assay/resources/credits/jars.txt
11+
12+
mcc/resources/credits/dependencies.txt
13+
mcc/resources/credits/jars.txt
14+
15+
mGAP/resources/credits/dependencies.txt
16+
mGAP/resources/credits/jars.txt
17+
18+
tcrdb/resources/credits/dependencies.txt
19+
tcrdb/resources/credits/jars.txt
20+
921
variantdb/resources/credits/dependencies.txt
1022
variantdb/resources/credits/jars.txt

mGAP/resources/etls/prime-seq.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@
9191
<column>liftedVcfId/dataid/DataFileUrl</column>
9292
<column>liftedVcfId/name</column>
9393
<column>liftedVcfId/library_id/name</column>
94+
<column>sitesOnlyVcfId/dataid/DataFileUrl</column>
95+
<column>sitesOnlyVcfId/name</column>
9496
<column>humanJbrowseId</column>
9597
<column>objectId</column>
9698
</sourceColumns>
@@ -99,6 +101,7 @@
99101
<columnTransforms>
100102
<column source="genomeId/name" target="genomeId" transformClass="org.labkey.mgap.columnTransforms.GenomeTransform" />
101103
<column source="vcfId/dataid/DataFileUrl" target="vcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
104+
<column source="sitesOnlyVcfId/dataid/DataFileUrl" target="sitesOnlyVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
102105
<column source="jbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseSessionTransform"/>
103106
<column source="liftedVcfId/dataid/DataFileUrl" target="liftedVcfId" transformClass="org.labkey.mgap.columnTransforms.LiftedVcfTransform" />
104107
<column source="humanJbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseHumanSessionTransform"/>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE mGAP.variantCatalogReleases ADD sitesOnlyVcfId int;

mGAP/resources/schemas/mgap.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,15 @@
149149
<fkColumnName>rowid</fkColumnName>
150150
</fk>
151151
</column>
152+
<column columnName="sitesOnlyVcfId">
153+
<columnTitle>Sites-Only VCF File</columnTitle>
154+
<nullable>true</nullable>
155+
<fk>
156+
<fkDbSchema>sequenceanalysis</fkDbSchema>
157+
<fkTable>outputfiles</fkTable>
158+
<fkColumnName>rowid</fkColumnName>
159+
</fk>
160+
</column>
152161
<column columnName="humanJbrowseId">
153162
<columnTitle>Genome Browser (Human)</columnTitle>
154163
<nullable>true</nullable>

mGAP/src/org/labkey/mgap/mGAPModule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public String getName()
5959
@Override
6060
public Double getSchemaVersion()
6161
{
62-
return 16.59;
62+
return 16.60;
6363
}
6464

6565
@Override

mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputs, List<Sequ
266266
Map<String, SequenceOutputFile> outputVCFMap = new HashMap<>();
267267
Map<String, SequenceOutputFile> outputTableMap = new HashMap<>();
268268
Map<String, SequenceOutputFile> liftedVcfMap = new HashMap<>();
269+
Map<String, SequenceOutputFile> sitesOnlyVcfMap = new HashMap<>();
269270
Map<String, SequenceOutputFile> trackVCFMap = new HashMap<>();
270271

271272
for (SequenceOutputFile so : outputsCreated)
@@ -285,6 +286,11 @@ else if (so.getCategory().contains("Lifted"))
285286
String name = so.getName().replaceAll(" Lifted to Human", "");
286287
liftedVcfMap.put(name, so);
287288
}
289+
else if (so.getCategory().contains("mGAP Release: Sites Only"))
290+
{
291+
String name = so.getName().replaceAll(": Sites Only", "");
292+
sitesOnlyVcfMap.put(name, so);
293+
}
288294
else if (so.getCategory().endsWith("Release"))
289295
{
290296
outputVCFMap.put(so.getName(), so);
@@ -328,6 +334,12 @@ else if (so.getCategory().endsWith("Release Track"))
328334
throw new PipelineJobException("Unable to find lifted VCF for release: " + release);
329335
}
330336

337+
SequenceOutputFile sitesOnlyVcf = sitesOnlyVcfMap.get(release);
338+
if (sitesOnlyVcf == null)
339+
{
340+
throw new PipelineJobException("Unable to find sites-only VCF for release: " + release);
341+
}
342+
331343
//find basic stats:
332344
job.getLogger().info("inspecting file: " + so.getName());
333345
int totalSubjects;
@@ -391,6 +403,7 @@ else if (so.getCategory().endsWith("Release Track"))
391403
row.put("releaseDate", new Date());
392404
row.put("vcfId", so.getRowid());
393405
row.put("liftedVcfId", liftedVcf.getRowid());
406+
row.put("sitesOnlyVcfId", sitesOnlyVcf.getRowid());
394407
row.put("variantTable", so2.getRowid());
395408
row.put("genomeId", so.getLibrary_id());
396409
row.put("totalSubjects", totalSubjects);
@@ -908,8 +921,12 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, ReferenceGenome s
908921
wrapper.execute(sourceGenome.getWorkingFastaFile(), primaryTrackVcf, noGenotypes, Arrays.asList("--sites-only-vcf-output"));
909922
}
910923

911-
ctx.getFileManager().addIntermediateFile(noGenotypes);
912-
ctx.getFileManager().addIntermediateFile(new File(noGenotypes.getPath() + ".tbi"));
924+
SequenceOutputFile output = new SequenceOutputFile();
925+
output.setFile(noGenotypes);
926+
output.setName(primaryTrackVcf.getName() + ": Sites Only");
927+
output.setCategory("mGAP Release: Sites Only");
928+
output.setLibrary_id(sourceGenome.getGenomeId());
929+
ctx.getFileManager().addSequenceOutput(output);
913930

914931
//lift to target genome
915932
Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class);

tcrdb/resources/assay/TCRdb/domains/run.xml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,6 @@
2323
<exp:RangeURI>http://www.w3.org/2001/XMLSchema#multiLine</exp:RangeURI>
2424
<exp:Label>Comments</exp:Label>
2525
</exp:PropertyDescriptor>
26-
<exp:PropertyDescriptor>
27-
<exp:Name>assayId</exp:Name>
28-
<exp:Required>false</exp:Required>
29-
<exp:RangeURI>http://www.w3.org/2001/XMLSchema#int</exp:RangeURI>
30-
<exp:Label>Assay Id</exp:Label>
31-
</exp:PropertyDescriptor>
3226
<exp:PropertyDescriptor>
3327
<exp:Name>performedBy</exp:Name>
3428
<exp:Required>false</exp:Required>

tcrdb/src/org/labkey/tcrdb/pipeline/CellRangerVDJCellHashingHandler.java

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,13 @@
3838

3939
public class CellRangerVDJCellHashingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
4040
{
41-
private FileType _vloupeFileType = new FileType("vloupe", false);
42-
private FileType _htmlFileType = new FileType("html", false);
41+
private final FileType _vloupeFileType = new FileType("vloupe", false);
42+
private final FileType _htmlFileType = new FileType("html", false);
4343

4444
public static final String CATEGORY = "Cell Hashing Calls (VDJ)";
4545

4646
public static final String TARGET_ASSAY = "targetAssay";
4747
public static final String DELETE_EXISTING_ASSAY_DATA = "deleteExistingAssayData";
48-
public static final String ALLOW_GD_RECOVERY = "allowGDRecovery";
4948
public static final String USE_GEX_BARCODES = "useGexBarcodes";
5049

5150
public CellRangerVDJCellHashingHandler()
@@ -60,15 +59,12 @@ private static List<ToolParameterDescriptor> getDefaultParams()
6059
ToolParameterDescriptor.create(DELETE_EXISTING_ASSAY_DATA, "Delete Any Existing Assay Data", "If selected, prior to importing assay data, and existing assay runs in the target container from this readset will be deleted.", "checkbox", new JSONObject(){{
6160
put("checked", true);
6261
}}, true),
63-
ToolParameterDescriptor.create(ALLOW_GD_RECOVERY, "Perform G/D Recovery", "Cellranger marks TRG/TRD rows as non-productive. As a result, gamma/delta cells will tend to be marked non-cell, since the cell lacks productive A/B chain. If selected, the code will ignore the cellranger is_cell flag to recover cells if the row is TRD/TRG, it has a CDR3 and is full-length.", "checkbox", new JSONObject(){{
64-
put("checked", false);
65-
}}, false),
6662
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
6763
put("checked", true);
68-
}}, false),
69-
ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
70-
put("checked", true);
7164
}}, false)
65+
// ToolParameterDescriptor.create(USE_GEX_BARCODES, "Use GEX and TCR Cell Barcodes", "If checked, the cell barcode whitelist used for cell hashing will be the union of TCR and GEX cell barcodes. If T-cells are a rare component of total cells, this might enhance the effectiveness of the callers by providing more positive signal.", "checkbox", new JSONObject(){{
66+
// put("checked", true);
67+
// }}, false)
7268
));
7369

7470
ret.addAll(CellHashingService.get().getHashingCallingParams(false));
@@ -163,16 +159,10 @@ public void complete(PipelineJob job, List<SequenceOutputFile> inputFiles, List<
163159
deleteExistingData = ConvertHelper.convert(job.getParameters().get(DELETE_EXISTING_ASSAY_DATA), Boolean.class);
164160
}
165161

166-
boolean allowGDRecovery = false;
167-
if (job.getParameters().get(ALLOW_GD_RECOVERY) != null)
168-
{
169-
allowGDRecovery = ConvertHelper.convert(job.getParameters().get(ALLOW_GD_RECOVERY), Boolean.class);
170-
}
171-
172162
for (SequenceOutputFile so : inputFiles)
173163
{
174164
AnalysisModel model = support.getCachedAnalysis(so.getAnalysis_id());
175-
new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData, allowGDRecovery);
165+
new CellRangerVDJUtils(job.getLogger()).importAssayData(job, model, so.getFile(), job.getLogFile().getParentFile(), assayId, null, deleteExistingData);
176166
}
177167
}
178168
}
@@ -225,8 +215,7 @@ private void processVloupeFile(JobContext ctx, File perCellTsv, Readset rs, Reco
225215
parameters.basename = FileUtil.makeLegalName(rs.getName());
226216
parameters.allowableHtoBarcodes = htosPerReadset;
227217

228-
boolean allowGDRecovery = ctx.getParams().optBoolean(ALLOW_GD_RECOVERY, false);
229-
parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true, allowGDRecovery);
218+
parameters.cellBarcodeWhitelistFile = createCellbarcodeWhitelist(ctx, perCellTsv, true);
230219
File existingCountMatrixUmiDir = CellHashingService.get().getExistingFeatureBarcodeCountDir(rs, CellHashingService.BARCODE_TYPE.hashing, ctx.getSequenceSupport());
231220

232221
File cellToHto = CellHashingService.get().generateHashingCallsForRawMatrix(rs, output, ctx, parameters, existingCountMatrixUmiDir);
@@ -248,52 +237,55 @@ else if (htosPerReadset.size() == 1)
248237
}
249238
}
250239

251-
private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3, boolean allowGDRecovery) throws PipelineJobException
240+
private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean allowCellsLackingCDR3) throws PipelineJobException
252241
{
253242
//prepare whitelist of cell indexes based on TCR calls:
254243
File cellBarcodeWhitelist = new File(ctx.getSourceDirectory(), "validCellIndexes.csv");
255244
Set<String> uniqueBarcodes = new HashSet<>();
256245
Set<String> uniqueBarcodesIncludingNoCDR3 = new HashSet<>();
257246
ctx.getLogger().debug("writing cell barcodes, using file: " + perCellTsv.getPath());
258247
ctx.getLogger().debug("allow cells lacking CDR3: " + allowCellsLackingCDR3);
259-
ctx.getLogger().debug("allow gamma/delta recovery: " + allowGDRecovery);
260248

261249
int totalBarcodeWritten = 0;
250+
int cellbarcodeIdx = 0;
251+
int notCellIdx = 1;
252+
int cdr3Idx = -1;
262253
try (CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(cellBarcodeWhitelist), ',', CSVWriter.NO_QUOTE_CHARACTER); CSVReader reader = new CSVReader(Readers.getReader(perCellTsv), ','))
263254
{
264255
int rowIdx = 0;
265256
int noCallRows = 0;
266257
int nonCell = 0;
267-
int recoveredGD = 0;
268258
String[] row;
269259
while ((row = reader.readNext()) != null)
270260
{
271261
//skip header
272262
rowIdx++;
273-
if (rowIdx > 1)
263+
if (rowIdx == 1)
264+
{
265+
List<String> header = Arrays.asList(row);
266+
cdr3Idx = header.indexOf("cdr3");
267+
if (cdr3Idx == -1)
268+
{
269+
throw new PipelineJobException("Unable to find CDR3 field in header: " + perCellTsv.getPath());
270+
}
271+
}
272+
else
274273
{
275-
if ("False".equalsIgnoreCase(row[1]))
274+
if ("False".equalsIgnoreCase(row[notCellIdx]))
276275
{
277-
if (allowGDRecovery && CellRangerVDJUtils.shouldRecoverGammaDeltaRow(row))
278-
{
279-
recoveredGD++;
280-
}
281-
else
282-
{
283-
nonCell++;
284-
continue;
285-
}
276+
nonCell++;
277+
continue;
286278
}
287279

288280
//NOTE: allow these to pass for cell-hashing under some conditions
289-
boolean hasCDR3 = !"None".equals(row[12]);
281+
boolean hasCDR3 = !"None".equals(row[cdr3Idx]);
290282
if (!hasCDR3)
291283
{
292284
noCallRows++;
293285
}
294286

295287
//NOTE: 10x appends "-1" to barcodes
296-
String barcode = row[0].split("-")[0];
288+
String barcode = row[cellbarcodeIdx].split("-")[0];
297289
if ((allowCellsLackingCDR3 || hasCDR3) && !uniqueBarcodes.contains(barcode))
298290
{
299291
writer.writeNext(new String[]{barcode});
@@ -308,7 +300,6 @@ private File createCellbarcodeWhitelist(JobContext ctx, File perCellTsv, boolean
308300
ctx.getLogger().debug("rows inspected: " + (rowIdx - 1));
309301
ctx.getLogger().debug("rows without CDR3: " + noCallRows);
310302
ctx.getLogger().debug("rows not called as cells: " + nonCell);
311-
ctx.getLogger().debug("gamma/delta clonotype rows recovered: " + recoveredGD);
312303
ctx.getLogger().debug("unique cell barcodes (with CDR3): " + uniqueBarcodes.size());
313304
ctx.getLogger().debug("unique cell barcodes (including no CDR3): " + uniqueBarcodesIncludingNoCDR3.size());
314305
ctx.getFileManager().addIntermediateFile(cellBarcodeWhitelist);

0 commit comments

Comments
 (0)