Forum Discussion

indraniria's avatar
indraniria
Contributor
3 years ago

How to find a file containing a particular substring from a list of files in a folder using groovy?

I have a list of files inside a folder. From these files , I have to find a file that is containing a particular transaction (a substring). How do I find that particular file using groovy scripting?

  • Hi,

     

    The code below might not be the most elegant of efficient, but it works....

     

    def filesToLookAt = findStringInFilesInFolder("c:\\Temp", "Time");
    
    log.info(filesToLookAt);
    
    return filesToLookAt;
    
    
    
    def findStringInFilesInFolder(folderName, searchString) {
    
    	log.info("Searching for '${searchString}' in '${folderName}'.");
    
    	def filesContainingSearchString = [];
    
         // Let's add the file names into a List for later...
    	def fileNames = findFilesInFolder(folderName);
    	
    	log.info(fileNames);
    
    	fileNames.each { file ->
    		if (searchInFile(file, searchString)) {
    			filesContainingSearchString.add(file)
    		}
    	}
    
    	return filesContainingSearchString;
    }
    
    def findFilesInFolder(folderName) {
    
    	def fileNames = [];
    
    	new File(folderName).eachFile() {  file->
    		if (file.isFile()) {
    			fileNames.add( file.getAbsolutePath() )
    		}
          }
    
          return fileNames;
    }
    
    def searchInFile(fileName, searchValue) {
    
    	def stringFound = false;
    	
    	log.info("Checking ${fileName}.");
    
    	// Only search within text files.
    	if( fileName.contains(".txt") ||
    		fileName.contains(".csv") ||
    		fileName.contains(".xml") ||
    		fileName.contains(".htm") ) {
    
    		new File(fileName).eachLine {  line->
    			if (line.contains(searchValue)) {
    				stringFound = true
    			}
    		}
    			
    	} else {
    		log.info("File '${fileName}' is not a type to search in.")
    	}
    
    	return stringFound;
    	
    }
    
  • Hi,

     

    Here's v2 of the script which returns the first file that the search string is in.  It uses a for loop to iterate over the file names.  When the string is found, it stops and returns the file it was found in.

    This might help you with your other question about breaking out of loops.

    def fileToLookAt = findStringInFilesInFolder("c:\\Temp", "String to search for");
    
    log.info(fileToLookAt);
    
    return fileToLookAt;
    
    
    
    def findStringInFilesInFolder(folderName, searchString) {
    
    	log.info("Searching for '${searchString}' in '${folderName}'.");
    
         // Let's add the file names into a List for later...
    	def fileNames = findFilesInFolder(folderName);
    	
    	log.info(fileNames);
    
    	def numOfFiles = fileNames.size();
    	def found = false;
    	int count;
    
    	// Use a For loop so we can break early...
    	for(count = 0; count < numOfFiles; count++) {
    
    		found = searchInFile(fileNames[count], searchString);
    
    		if (found) {
    			log.info("Cool!  Found ${searchString} in ${fileNames[count]}, we only searched ${count + 1} files, instead of ${numOfFiles}.");
    			break;
    		}
    	}
    
    	if (found) {
    		return fileNames[count];
    	} else {
    		return "";
    	}
    	
    }
    
    def findFilesInFolder(folderName) {
    
    	def fileNames = [];
    
    	new File(folderName).eachFile() {  file->
    		if (file.isFile()) {
    			fileNames.add( file.getAbsolutePath() )
    		}
          }
    
          return fileNames;
    }
    
    def searchInFile(fileName, searchValue) {
    
    	def stringFound = false;
    	
    	log.info("Checking ${fileName}.");
    
    	// Only search within text files.
    	if( fileName.contains(".txt") ||
    		fileName.contains(".csv") ||
    		fileName.contains(".xml") ||
    		fileName.contains(".htm") ) {
    
    		String fileContents = new File(fileName).getText('UTF-8');
    
    		if (fileContents.contains(searchValue)) {
    				stringFound = true
    		}
    			
    	} else {
    		log.info("File '${fileName}' is not a type to search in.")
    	}
    
    	return stringFound;
    	
    }
    
  • ChrisAdams's avatar
    ChrisAdams
    Champion Level 3

    Hi,

     

    The code below might not be the most elegant of efficient, but it works....

     

    def filesToLookAt = findStringInFilesInFolder("c:\\Temp", "Time");
    
    log.info(filesToLookAt);
    
    return filesToLookAt;
    
    
    
    def findStringInFilesInFolder(folderName, searchString) {
    
    	log.info("Searching for '${searchString}' in '${folderName}'.");
    
    	def filesContainingSearchString = [];
    
         // Let's add the file names into a List for later...
    	def fileNames = findFilesInFolder(folderName);
    	
    	log.info(fileNames);
    
    	fileNames.each { file ->
    		if (searchInFile(file, searchString)) {
    			filesContainingSearchString.add(file)
    		}
    	}
    
    	return filesContainingSearchString;
    }
    
    def findFilesInFolder(folderName) {
    
    	def fileNames = [];
    
    	new File(folderName).eachFile() {  file->
    		if (file.isFile()) {
    			fileNames.add( file.getAbsolutePath() )
    		}
          }
    
          return fileNames;
    }
    
    def searchInFile(fileName, searchValue) {
    
    	def stringFound = false;
    	
    	log.info("Checking ${fileName}.");
    
    	// Only search within text files.
    	if( fileName.contains(".txt") ||
    		fileName.contains(".csv") ||
    		fileName.contains(".xml") ||
    		fileName.contains(".htm") ) {
    
    		new File(fileName).eachLine {  line->
    			if (line.contains(searchValue)) {
    				stringFound = true
    			}
    		}
    			
    	} else {
    		log.info("File '${fileName}' is not a type to search in.")
    	}
    
    	return stringFound;
    	
    }
    
  • Thanks for your help. As I have a large number of files in that folder, script is taking me really long time. will try to modify wherever I can

    • ChrisAdams's avatar
      ChrisAdams
      Champion Level 3

      Hi,

      I'm glad it works, but let's see if we can make it quicker.

      This large number of files.  Is that a valid use-case or could they be cleared out between tests?  Do the files tend to be of a similar size?

      There's a couple things that we could do....

      • If you only want the first file the search value is in, we could break out of the loop to save time.  The current solution searches every file and reports all files with the search string.
      • If the files are all small, we could read the file into one string and then we only need to call 'contains' once.  I went with the current approach as I'm not sure how it would cope with trying to put the contents of a large file into a single string.

       

  • ChrisAdams's avatar
    ChrisAdams
    Champion Level 3

    Hi,

     

    Here's v2 of the script which returns the first file that the search string is in.  It uses a for loop to iterate over the file names.  When the string is found, it stops and returns the file it was found in.

    This might help you with your other question about breaking out of loops.

    def fileToLookAt = findStringInFilesInFolder("c:\\Temp", "String to search for");
    
    log.info(fileToLookAt);
    
    return fileToLookAt;
    
    
    
    def findStringInFilesInFolder(folderName, searchString) {
    
    	log.info("Searching for '${searchString}' in '${folderName}'.");
    
         // Let's add the file names into a List for later...
    	def fileNames = findFilesInFolder(folderName);
    	
    	log.info(fileNames);
    
    	def numOfFiles = fileNames.size();
    	def found = false;
    	int count;
    
    	// Use a For loop so we can break early...
    	for(count = 0; count < numOfFiles; count++) {
    
    		found = searchInFile(fileNames[count], searchString);
    
    		if (found) {
    			log.info("Cool!  Found ${searchString} in ${fileNames[count]}, we only searched ${count + 1} files, instead of ${numOfFiles}.");
    			break;
    		}
    	}
    
    	if (found) {
    		return fileNames[count];
    	} else {
    		return "";
    	}
    	
    }
    
    def findFilesInFolder(folderName) {
    
    	def fileNames = [];
    
    	new File(folderName).eachFile() {  file->
    		if (file.isFile()) {
    			fileNames.add( file.getAbsolutePath() )
    		}
          }
    
          return fileNames;
    }
    
    def searchInFile(fileName, searchValue) {
    
    	def stringFound = false;
    	
    	log.info("Checking ${fileName}.");
    
    	// Only search within text files.
    	if( fileName.contains(".txt") ||
    		fileName.contains(".csv") ||
    		fileName.contains(".xml") ||
    		fileName.contains(".htm") ) {
    
    		String fileContents = new File(fileName).getText('UTF-8');
    
    		if (fileContents.contains(searchValue)) {
    				stringFound = true
    		}
    			
    	} else {
    		log.info("File '${fileName}' is not a type to search in.")
    	}
    
    	return stringFound;
    	
    }