2121import java .io .BufferedReader ;
2222import java .io .File ;
2323import java .io .FileInputStream ;
24- import java .io .FileOutputStream ;
2524import java .io .IOException ;
2625import java .io .InputStream ;
2726import java .io .InputStreamReader ;
@@ -124,7 +123,7 @@ protected ContentHandler getContentHandler(OutputStream output, Metadata metadat
124123 return new DefaultHandler ();
125124 }
126125 };
127- private File extractDir = new File ("." );
126+ private Path extractDir = Paths . get ("." );
128127 private ParseContext context ;
129128 private Detector detector ;
130129 private Parser parser ;
@@ -339,7 +338,7 @@ private void configurePDFExtractSettings() {
339338 pdfParserConfig .setExtractIncrementalUpdateInfo (true );
340339 pdfParserConfig .setParseIncrementalUpdates (true );
341340 String warn = "As a convenience, TikaCLI has turned on extraction of\n " +
342- "inline images and incremental updates for the PDFParser (TIKA-2374, " +
341+ "inline images and parsing of incremental updates for the PDFParser (TIKA-2374, " +
343342 "TIKA-4017 and TIKA-4354).\n " +
344343 "This is not the default behavior in Tika generally or in tika-server." ;
345344 LOG .info (warn );
@@ -441,7 +440,7 @@ public void process(String arg) throws Exception {
441440 if (dirPath .isEmpty ()) {
442441 dirPath = "." ;
443442 }
444- extractDir = new File (dirPath );
443+ extractDir = Paths . get (dirPath );
445444 } else if (arg .equals ("-z" ) || arg .equals ("--extract" )) {
446445 type = NO_OUTPUT ;
447446 context .set (EmbeddedDocumentExtractor .class , new FileEmbeddedDocumentExtractor ());
@@ -1089,22 +1088,20 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
10891088 MediaType contentType = detector .detect (inputStream , metadata );
10901089
10911090 String name = metadata .get (TikaCoreProperties .RESOURCE_NAME_KEY );
1092- File outputFile = null ;
1091+ Path outputFile = null ;
10931092 if (name == null ) {
1094- name = "file " + count ++;
1093+ name = "file_ " + count ++;
10951094 }
10961095 outputFile = getOutputFile (name , metadata , contentType );
10971096
10981097
1099- File parent = outputFile .getParentFile ();
1100- if (!parent .exists ()) {
1101- if (!parent .mkdirs ()) {
1102- throw new IOException ("unable to create directory \" " + parent + "\" " );
1103- }
1098+ Path parent = outputFile .getParent ();
1099+ if (parent != null && ! Files .isDirectory (parent )) {
1100+ Files .createDirectories (parent );
11041101 }
11051102 System .out .println ("Extracting '" + name + "' (" + contentType + ") to " + outputFile );
11061103
1107- try (FileOutputStream os = new FileOutputStream (outputFile )) {
1104+ try (OutputStream os = Files . newOutputStream (outputFile )) {
11081105 if (embeddedStreamTranslator .shouldTranslate (inputStream , metadata )) {
11091106 try (InputStream translated = embeddedStreamTranslator .translate (inputStream , metadata )) {
11101107 IOUtils .copy (translated , os );
@@ -1121,7 +1118,7 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
11211118 }
11221119 }
11231120
1124- private File getOutputFile (String name , Metadata metadata , MediaType contentType ) {
1121+ private Path getOutputFile (String name , Metadata metadata , MediaType contentType ) throws IOException {
11251122 String ext = getExtension (contentType );
11261123 if (name .indexOf ('.' ) == -1 && contentType != null ) {
11271124 name += ext ;
@@ -1148,13 +1145,14 @@ private File getOutputFile(String name, Metadata metadata, MediaType contentType
11481145 if (prefixLength > -1 ) {
11491146 normalizedName = normalizedName .substring (prefixLength );
11501147 }
1151- File outputFile = new File ( extractDir , normalizedName );
1148+ Path outputFile = extractDir . resolve ( normalizedName );
11521149 //if file already exists, prepend uuid
1153- if (outputFile .exists ()) {
1150+ if (Files .exists (outputFile )) {
11541151 String fileName = FilenameUtils .getName (normalizedName );
1155- outputFile = new File (extractDir , UUID
1156- .randomUUID ()
1157- .toString () + "-" + fileName );
1152+ outputFile = extractDir .resolve ( UUID .randomUUID () + "-" + fileName );
1153+ }
1154+ if (! outputFile .toAbsolutePath ().normalize ().startsWith (extractDir .toAbsolutePath ().normalize ())) {
1155+ throw new IOException ("Path traversal?!: " + outputFile .toAbsolutePath ());
11581156 }
11591157 return outputFile ;
11601158 }
@@ -1171,7 +1169,7 @@ private String getExtension(MediaType contentType) {
11711169 return ext ;
11721170 }
11731171 } catch (MimeTypeException e ) {
1174- e . printStackTrace ( );
1172+ LOG . info ( "bad mime type?" , e );
11751173 }
11761174 return ".bin" ;
11771175
0 commit comments