Skip to content

Commit 0c89f4b

Browse files
committed
Simplify path naming
(cherry picked from commit 6e224a1) # Conflicts: # tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
1 parent 0b889d1 commit 0c89f4b

File tree

3 files changed

+24
-23
lines changed

3 files changed

+24
-23
lines changed

tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import java.io.BufferedReader;
2222
import java.io.File;
2323
import java.io.FileInputStream;
24-
import java.io.FileOutputStream;
2524
import java.io.IOException;
2625
import java.io.InputStream;
2726
import java.io.InputStreamReader;
@@ -124,7 +123,7 @@ protected ContentHandler getContentHandler(OutputStream output, Metadata metadat
124123
return new DefaultHandler();
125124
}
126125
};
127-
private File extractDir = new File(".");
126+
private Path extractDir = Paths.get(".");
128127
private ParseContext context;
129128
private Detector detector;
130129
private Parser parser;
@@ -339,7 +338,7 @@ private void configurePDFExtractSettings() {
339338
pdfParserConfig.setExtractIncrementalUpdateInfo(true);
340339
pdfParserConfig.setParseIncrementalUpdates(true);
341340
String warn = "As a convenience, TikaCLI has turned on extraction of\n" +
342-
"inline images and incremental updates for the PDFParser (TIKA-2374, " +
341+
"inline images and parsing of incremental updates for the PDFParser (TIKA-2374, " +
343342
"TIKA-4017 and TIKA-4354).\n" +
344343
"This is not the default behavior in Tika generally or in tika-server.";
345344
LOG.info(warn);
@@ -441,7 +440,7 @@ public void process(String arg) throws Exception {
441440
if (dirPath.isEmpty()) {
442441
dirPath = ".";
443442
}
444-
extractDir = new File(dirPath);
443+
extractDir = Paths.get(dirPath);
445444
} else if (arg.equals("-z") || arg.equals("--extract")) {
446445
type = NO_OUTPUT;
447446
context.set(EmbeddedDocumentExtractor.class, new FileEmbeddedDocumentExtractor());
@@ -1089,22 +1088,20 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
10891088
MediaType contentType = detector.detect(inputStream, metadata);
10901089

10911090
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
1092-
File outputFile = null;
1091+
Path outputFile = null;
10931092
if (name == null) {
1094-
name = "file" + count++;
1093+
name = "file_" + count++;
10951094
}
10961095
outputFile = getOutputFile(name, metadata, contentType);
10971096

10981097

1099-
File parent = outputFile.getParentFile();
1100-
if (!parent.exists()) {
1101-
if (!parent.mkdirs()) {
1102-
throw new IOException("unable to create directory \"" + parent + "\"");
1103-
}
1098+
Path parent = outputFile.getParent();
1099+
if (parent != null && ! Files.isDirectory(parent)) {
1100+
Files.createDirectories(parent);
11041101
}
11051102
System.out.println("Extracting '" + name + "' (" + contentType + ") to " + outputFile);
11061103

1107-
try (FileOutputStream os = new FileOutputStream(outputFile)) {
1104+
try (OutputStream os = Files.newOutputStream(outputFile)) {
11081105
if (embeddedStreamTranslator.shouldTranslate(inputStream, metadata)) {
11091106
try (InputStream translated = embeddedStreamTranslator.translate(inputStream, metadata)) {
11101107
IOUtils.copy(translated, os);
@@ -1121,7 +1118,7 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
11211118
}
11221119
}
11231120

1124-
private File getOutputFile(String name, Metadata metadata, MediaType contentType) {
1121+
private Path getOutputFile(String name, Metadata metadata, MediaType contentType) throws IOException {
11251122
String ext = getExtension(contentType);
11261123
if (name.indexOf('.') == -1 && contentType != null) {
11271124
name += ext;
@@ -1148,13 +1145,14 @@ private File getOutputFile(String name, Metadata metadata, MediaType contentType
11481145
if (prefixLength > -1) {
11491146
normalizedName = normalizedName.substring(prefixLength);
11501147
}
1151-
File outputFile = new File(extractDir, normalizedName);
1148+
Path outputFile = extractDir.resolve(normalizedName);
11521149
//if file already exists, prepend uuid
1153-
if (outputFile.exists()) {
1150+
if (Files.exists(outputFile)) {
11541151
String fileName = FilenameUtils.getName(normalizedName);
1155-
outputFile = new File(extractDir, UUID
1156-
.randomUUID()
1157-
.toString() + "-" + fileName);
1152+
outputFile = extractDir.resolve( UUID.randomUUID() + "-" + fileName);
1153+
}
1154+
if (! outputFile.toAbsolutePath().normalize().startsWith(extractDir.toAbsolutePath().normalize())) {
1155+
throw new IOException("Path traversal?!: " + outputFile.toAbsolutePath());
11581156
}
11591157
return outputFile;
11601158
}
@@ -1171,7 +1169,7 @@ private String getExtension(MediaType contentType) {
11711169
return ext;
11721170
}
11731171
} catch (MimeTypeException e) {
1174-
e.printStackTrace();
1172+
LOG.info("bad mime type?", e);
11751173
}
11761174
return ".bin";
11771175

tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ public void testListSupportedTypes() throws Exception {
295295

296296
@Test
297297
public void testExtractSimple() throws Exception {
298-
String[] expectedChildren = new String[]{"MBD002B040A.cdx", "file4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file0.emf"};
298+
String[] expectedChildren = new String[]{"MBD002B040A.cdx", "file_4.png", "MBD002B0FA6.bin", "MBD00262FE3.txt", "file_0.emf"};
299299
testExtract("/coffee.xls", expectedChildren, 8);
300300
}
301301

tika-pipes/tika-emitters/tika-emitter-fs/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,20 +76,23 @@ public class FileSystemEmitter extends AbstractEmitter implements StreamEmitter
7676
@Override
7777
public void emit(String emitKey, List<Metadata> metadataList, ParseContext parseContext) throws IOException, TikaEmitterException {
7878
Path output;
79-
if (metadataList == null || metadataList.size() == 0) {
79+
if (metadataList == null || metadataList.isEmpty()) {
8080
throw new TikaEmitterException("metadata list must not be null or of size 0");
8181
}
8282

83-
if (fileExtension != null && fileExtension.length() > 0) {
83+
if (fileExtension != null && ! fileExtension.isEmpty()) {
8484
emitKey += "." + fileExtension;
8585
}
8686
if (basePath != null) {
8787
output = basePath.resolve(emitKey);
88+
if (!output.toAbsolutePath().normalize().startsWith(basePath.toAbsolutePath().normalize())) {
89+
throw new TikaEmitterException("path traversal?! " + output.toAbsolutePath());
90+
}
8891
} else {
8992
output = Paths.get(emitKey);
9093
}
9194

92-
if (!Files.isDirectory(output.getParent())) {
95+
if (output.getParent() != null && !Files.isDirectory(output.getParent())) {
9396
Files.createDirectories(output.getParent());
9497
}
9598
try (Writer writer = Files.newBufferedWriter(output, StandardCharsets.UTF_8)) {

0 commit comments

Comments
 (0)