Show only lines that contain a search term
-
Here’s the script listing for
LineFilterViaHiding1.py
:# -*- coding: utf-8 -*- from __future__ import print_function # references: # https://community.notepad-plus-plus.org/topic/23929 "Show only lines that contain a search term" # https://community.notepad-plus-plus.org/topic/20058 (see Dec 17, 2022 9:50AM posting) from Npp import * import inspect import os from ctypes import (WinDLL) #------------------------------------------------------------------------------- # N++'s redmarking indicator number: SCE_UNIVERSAL_FOUND_STYLE = 31 # see https://github.com/notepad-plus-plus/notepad-plus-plus/search?q=SCE_UNIVERSAL_FOUND_STYLE #------------------------------------------------------------------------------- def editor_getWordAtCaretOrSelection(): retval = '' (sel_start, sel_end) = (editor.getSelectionStart(), editor.getSelectionEnd()) if editor.getSelections() == 1 and sel_start != sel_end: retval = editor.getTextRange(sel_start, sel_end) else: start_of_word_pos = editor.wordStartPosition(editor.getCurrentPos(), True) end_of_word_pos = editor.wordEndPosition(start_of_word_pos, True) if start_of_word_pos != end_of_word_pos: retval = editor.getTextRange(start_of_word_pos, end_of_word_pos) editor.setSelection(end_of_word_pos, start_of_word_pos) return retval #------------------------------------------------------------------------------- class LFVH1(object): def __init__(self): self.debug = True if 0 else False self.this_script_name = inspect.getframeinfo(inspect.currentframe()).filename.split(os.sep)[-1].rsplit('.', 1)[0] self.settings_by_viewfile_dict = {} LINE_NUMBER_MARGIN_DEFAULT_BACKGROUND_RGB = editor.styleGetBack(STYLESCOMMON.LINENUMBER) self.UNFILTERED_BACKGROUND_RGB = LINE_NUMBER_MARGIN_DEFAULT_BACKGROUND_RGB self.FILTERED_BACKGROUND_RGB = (255, 192, 203) # pinkish margin color to indicate not all lines shown due to filtering # initialize for currently active tab when invoked for the first time view_plus_file = str(notepad.getCurrentView()) + notepad.getCurrentFilename() self.dprint('view_plus_file:', view_plus_file) if view_plus_file not in self.settings_by_viewfile_dict: self.reset_settings_for_active_tab(view_plus_file) notepad.callback(self.bufferactivated_callback, [NOTIFICATION.BUFFERACTIVATED]) editor.callback(self.updateui_callback, [SCINTILLANOTIFICATION.UPDATEUI]) def prompt_for_string_to_match(self): view_plus_file = str(notepad.getCurrentView()) + notepad.getCurrentFilename() self.dprint('view_plus_file:', view_plus_file) if view_plus_file not in self.settings_by_viewfile_dict: self.reset_settings_for_active_tab(view_plus_file) # determine what default text to put in the user prompt box: user_input = self.settings_by_viewfile_dict[view_plus_file]['previous_user_input'] if editor.getSelectionEmpty(): __ = editor_getWordAtCaretOrSelection() if len(__) > 0: user_input = __ else: rect_sel_mode = editor.getSelectionMode() in [ SELECTIONMODE.RECTANGLE, SELECTIONMODE.THIN ] multi_sel_mode = editor.getSelections() > 1 if not rect_sel_mode and not multi_sel_mode: stream_sel_contents = editor.getSelText() if '\r' not in stream_sel_contents and '\n' not in stream_sel_contents: # we only want selected text if it is all one one line user_input = stream_sel_contents prompt_str = '\r\n'.join([ ' (Hold Shift while answering or leave box empty to restore showing of all lines)', 'ENTER LITERAL SEARCH STRING to show only those lines that contain the string', ]) user_input = self.prompt(prompt_str, user_input) restore_all_lines_to_shown = self.shift_held() if restore_all_lines_to_shown: user_input = '' if user_input is None: return # user cancel if len(user_input) == 0: if len(self.settings_by_viewfile_dict[view_plus_file]['show_lines_tup_list']) > 0: # we currently have some hidden lines if restore_all_lines_to_shown or self.yes_no('Are you sure you want to show ALL lines?'): self.settings_by_viewfile_dict[view_plus_file]['show_lines_tup_list'] = [] # empty list means to show all lines self.freshen_user_view_of_active_tab() return # find the list of lines that should be shown: L = [] editor.search(user_input, lambda m: L.append(m.span(0))) self.dprint('L:', L) if len(L) == 0: __ = user_input[:20] if __ != user_input: __ += '...' msg = '\r\n\r\n'.join([ 'No matches for "{}"'.format(__), 'Leaving previously displayed lines unchanged.', ]) self.mb(msg) return # remember current values: self.settings_by_viewfile_dict[view_plus_file]['show_lines_tup_list'] = L self.settings_by_viewfile_dict[view_plus_file]['previous_user_input'] = user_input self.freshen_user_view_of_active_tab() def bufferactivated_callback(self, args): self.dprint('BUFFERACTIVATED:', args) view_plus_file = str(notepad.getCurrentView()) + notepad.getCurrentFilename() self.dprint('view_plus_file:', view_plus_file) if view_plus_file not in self.settings_by_viewfile_dict: self.reset_settings_for_active_tab(view_plus_file) # refresh the hidden/shown state of the lines, because N++ will revert # our previously hidden lines to shown when a tab is activated self.freshen_user_view_of_active_tab() def updateui_callback(self, args): # this is mainly for when user renames a file, or does a save-as; # it might be our quickest route to detection of that #self.dprint('UPDATEUI:', args) # would fire too often! view_plus_file = str(notepad.getCurrentView()) + notepad.getCurrentFilename() #self.dprint('view_plus_file:', view_plus_file) if view_plus_file not in self.settings_by_viewfile_dict: # the current tab is seen as never having been encountered before (from a rename or a save-as, e.g.) self.reset_settings_for_active_tab(view_plus_file) # only update when absolutely necessary, as UPDATEUI callback happens a lot! self.freshen_user_view_of_active_tab() def freshen_user_view_of_active_tab(self): view_plus_file = str(notepad.getCurrentView()) + notepad.getCurrentFilename() if view_plus_file not in self.settings_by_viewfile_dict: self.reset_settings_for_active_tab(view_plus_file) L = self.settings_by_viewfile_dict[view_plus_file]['show_lines_tup_list'] show_all_not_hide_some = len(L) == 0 editor.foldAll(FOLDACTION.EXPAND) editor.setIndicatorCurrent(SCE_UNIVERSAL_FOUND_STYLE) editor.indicatorClearRange(0, editor.getLength()) if show_all_not_hide_some: self.show_all_lines() # set the background of the line number margin to normal # to indicate that all lines are shown editor.styleSetBack(STYLESCOMMON.LINENUMBER, self.UNFILTERED_BACKGROUND_RGB) else: line_of_caret = editor.lineFromPosition(editor.getCurrentPos()) # hide all lines first: self.show_all_lines(False) # show only lines with hits from the search based on user criterion: for (pos_start, pos_end) in L: editor.setIndicatorCurrent(SCE_UNIVERSAL_FOUND_STYLE) editor.indicatorFillRange(pos_start, pos_end - pos_start) line_start = editor.lineFromPosition(pos_start) line_end = editor.lineFromPosition(pos_end) editor.showLines(line_start, line_end) # make sure caret is not within a now-hidden region of lines: if not editor.getLineVisible(line_of_caret): # caret needs to be moved moved_caret = False # see if caret can be moved below its current position for new_loc in range(line_of_caret, editor.getLineCount()): if editor.getLineVisible(new_loc): editor.setEmptySelection(editor.positionFromLine(new_loc)) editor.chooseCaretX() moved_caret = True break if not moved_caret: # see if caret can be moved above its current position for new_loc in range(line_of_caret - 1, 0, -1): if editor.getLineVisible(new_loc): editor.setEmptySelection(editor.positionFromLine(new_loc)) editor.chooseCaretX() moved_caret = True break if not moved_caret: # user line 1 is always shown, so punt and put caret there editor.setEmptySelection(0) editor.chooseCaretX() # set the background of the line number margin to a special color # to indicate that some lines are not shown editor.styleSetBack(STYLESCOMMON.LINENUMBER, self.FILTERED_BACKGROUND_RGB) def reset_settings_for_active_tab(self, view_plus_file): if view_plus_file not in self.settings_by_viewfile_dict: self.settings_by_viewfile_dict[view_plus_file] = { 'show_lines_tup_list' : [], # empty list means to show all lines 'previous_user_input' : '', } def show_all_lines(self, show_not_hide=True): f = editor.showLines if show_not_hide else editor.hideLines f(1, editor.getLineCount() - 1) # can't hide user line 1 (Scintilla restriction) def shift_held(self): VK_SHIFT = 0x10 user32 = WinDLL('user32') return (user32.GetAsyncKeyState(VK_SHIFT) & 0x8000) != 0 def print(self, *args, **kwargs): try: self.print_first except AttributeError: self.print_first = True if self.print_first: console.show() # this will put input focus in the PS console window, at the >>> prompt #console.clear() editor.grabFocus() # put input focus back into the editor window self.print_first = False d_tag = '<DBG>' if 'debug' in kwargs else '' if 'debug' in kwargs: del kwargs['debug'] print(self.__class__.__name__ + d_tag + ':', *args, **kwargs) def dprint(self, *args, **kwargs): # debug print function if self.debug: kwargs['debug'] = True self.print(*args, **kwargs) def mb(self, msg, flags=0, title=''): # a message-box function return notepad.messageBox(msg, title if title else self.this_script_name, flags) def yes_no(self, question_text): # returns True(Yes), False(No) answer = self.mb(question_text, MESSAGEBOXFLAGS.YESNO, self.this_script_name) return True if answer == MESSAGEBOXFLAGS.RESULTYES else False def prompt(self, prompt_text, default_text=''): if '\n' not in prompt_text: prompt_text = '\r\n' + prompt_text prompt_text += ':' return notepad.prompt(prompt_text, self.this_script_name, default_text) #------------------------------------------------------------------------------- if __name__ == '__main__': try: LINE_FILTER_VIA_HIDING1 except NameError: LINE_FILTER_VIA_HIDING1 = LFVH1() LINE_FILTER_VIA_HIDING1.prompt_for_string_to_match()
-
-
@Alan-Kilborn said in Show only lines that contain a search term:
I downloaded your script and executed it on the Npp license (a .txt file) following your prescriptions. You may be glad to hear that it worked on my PC exactly as you described it.
I’m now in the process of reading your code and trying to learn from it. Your style of writing is a long way removed from the style of the very first program I wrote, which was a translation from a Fortran II to an Algol 60 program. In those days Fortran variables and subroutines had names of max 6 characters that, moreover, were capitals (per some old ASCII standard). Algol gave a little bit more freedom, but I followed as much as possible the Fortran original. Of course, there were no classes, neither in Algol nor in Fortran. Function names as
LINE_FILTER_VIA_HIDING1.prompt_for_string_to_match()
bring back these old memories, far removed as they are from the standards I started with. -
It’s fine to wax poetic about times gone by, but I’d be interested at any time to your feedback on the script, in a critical sense. There’s many directions future development could take on it, and hopefully the script as is, and as it could possibly be will be a model for future native Notepad++ features involving hidden lines.
My “style of writing” scripts has evolved much in the 7+ years I’ve been doing it. The current class-based approach was brought into play at one point over those many years because I disliked the namespace “pollution” that script execution creates, and the class-based stuff minimizes that.
Now many of the scripts I publish (and of course the others that I write for myself) look very similar in structure. This is because I have a script that I run when I want to create a new script! (of course!). I call that script “NewScriptReplacementWithBoilerplateCodeCreation.py” and, as the name implies it replaces (figuratively) the Plugins > PythonScript > New Script menu command. It prompts me for the name of my new script, and the type (simple or complicated – complicated meaning the script will use callbacks or similar mechanisms), and then generates my stock code.
-
@Alan-Kilborn
As I wrote before, I’m reading your script and what better tool than the use of the very script to understand it? I tried to understand the meaning of the dictionaryself.settings_by_viewfile_dict
and ran your script on it. The result below shows some lines that do not contain this string. -
I would also like to see a new native Notepad++ command Search - Filter, which would “Search As You Type”.
The LineFilter3 plugin is the best alternative for me, but native support with instant filtering the current document in the same window would be much nicer.
-
@Paul-Wormer said in Show only lines that contain a search term:
The result below shows some lines that do not contain this string.
Hmmmm, right you are. I can obtain the same result. Curious that I didn’t see this type of thing while working on the script. :-(
Ok, so let’s do this:
There’s a line in the script:
self.debug = True if 0 else False
. With that line as written, “debug” is off. BTW, I write that line that way so that I can change only a single character to turn debugging on or off. Let’s turn it on now, so change the0
to a1
.There are two places in the script where the PythonScript function
editor.showLines()
is called. In one place it is called to get all lines displayed, so right now we don’t care about that one. The one we care about is where the lines with the search string are displayed after all lines are hidden. That one looks like this in the code:editor.showLines(line_start, line_end)
. Find that line and put this new line after it:self.dprint('showing lines: {sp1}-{ep1} \t\t\tvia call: editor.showLines({s}, {e})'.format(s=line_start, e=line_end, sp1=line_start + 1, ep1=line_end + 1))
After restarting N++ to pick up the changes, run the functionality again.
Here’s an example of what I see:
Notice that since we’ve turned debugging on, we now see juicy details about the script in the PythonScript console window. In mine we see the lines the script wants to show: line 49, 58, etc. (Don’t be concerned if my line numbers don’t match yours)
The interesting thing is, in the editor window we can see lines 138 thru 143 that don’t have the search text, but the script only turns on display of line 130 before turning on line 147! (There are some other places in the output where this oddity occurs, but lines 138-143 are the most blatantly obvious)
I currently don’t have an explanation for this :-( but I’m going to keep looking at it…
-
@Alan-Kilborn I’ve tried you script now, too and it works well for me. Thank you for that!
Is it possible to do a case insensitive search? ( I suppose not yet).
BTW, I tried to copy all filtered lines to the clipboard but the whole file content gets copied - is this the expected behaviour?
-
@Paul-Wormer said in Show only lines that contain a search term:
self.settings_by_viewfile_dict
I think what I see is the correct result:
-
@datatraveller1 said in Show only lines that contain a search term:
Is it possible to do a case insensitive search? ( I suppose not yet).
Right, let’s “walk before we run” with this… :-)
BTW, I tried to copy all filtered lines to the clipboard but the whole file content gets copied - is this the expected behaviour?
For now, yes. If you select lines (e.g. have a “filtered” view and do a Ctrl+a select-all) that cross visible/hidden boundaries and then do some operation on the selection, you’ll get all real lines in that region – reference my cautions about deleting selected text earlier.
-
@datatraveller1 said in Show only lines that contain a search term:
I think what I see is the correct result:
@Alan-Kilborn Do you know why I get the correct result if I filter for self.settings_by_viewfile_dict while Paul and you get the wrong result? :-)
-
@datatraveller1 I run the script three times in a new session:
- Hide lines not containing the string
- Undo it (show all lines again)
- Hide lines not containing the string
Then: step 1 gives too many lines and step 3 is OK.
-
@Paul-Wormer I still get always correct results (also after repeating steps 1-3 up to 10 times).
-
@datatraveller1 Also in a new session? That is, after closing Npp and starting it again?
-
@Paul-Wormer Yes, always correct results.
-
@Alan-Kilborn
I made a small step forward. After line 170 I modified the code as follows:# hide all lines first: self.show_all_lines(False) import time print('sleep ..') time.sleep(60) # show only lines with hits from the search based on user criterion: for (pos_start, pos_end) in L: editor.setIndicatorCurrent(SCE_UNIVERSAL_FOUND_STYLE) editor.indicatorFillRange(pos_start, pos_end - pos_start) line_start = editor.lineFromPosition(pos_start) line_end = editor.lineFromPosition(pos_end) editor.showLines(line_start, line_end) time.sleep(60) print('Woke up')
The sleep(60) gave me time to look at what’s happening and I noticed that often not all lines are hidden at the first sleep. These lines stay visible after the second sleep. The lines that stay visible are more or less random, so I get the feeling that it is a matter of timing. When I increase the length of the file by adding comments different lines stay visible. Datatraveller1 may well have a faster PC than Alan and me.
-
Hello, @alan-kilborn, @paul-wormer, @datatraveller1 and All,
Well, I finished the traditional sequence of greetings, by phone, SMS and letters. So I just tested your Python script, Alan, and here are the results of my experiments !
-
Personally, I never saw the case of visible lines not containing the literal string to search for. Good point !
-
Like you, @alan-kilborn, I used the
License.txt
file, which I duplicated several times and enter the lower-case wordfree
when running the script And, as I increased the size of the test file, the waiting time to get the line margin highlighted, in salmon color, increased in parallel :
Search of the literal lower-case word 'free' Times "License.txt" Size Lines Hiding lines time --------------------------------------------------------------------------------------- license.txt × 1 35,500 bytes 231 lines 0,4 s license.txt × 5 177,500 bytes 1,155 lines 1,95 s license.txt × 10 355,000 bytes 2,310 lines 3,5 s license.txt × 50 1,775,000 bytes 11,550 lines 17 s license.txt × 100 3,550,000,bytes 23,100 lines 34 s license.txt × 500 17,750,000 bytes 115,500 lines 165 s license.txt × 1000 35,500,000,bytes 231,000 lines 335 s
Now, if we choose to look for the literal string
COPYING
, which occurs once only in thelicense.txt
file ( instead of24
times for the wordfree
), the results are very fast, in comparison :Search of the literal upper-case word 'COPYING' Times "License.txt" Size Lines Hiding lines time --------------------------------------------------------------------------------------- license.txt × 1000 35,500,000,bytes 231,000 lines 12,5 s
Of course, if I tried to re-run the script, before getting the highlighted line margin, I received the usual message :
Another script is currently running. Running two scripts at the same time could produce unpredicable results, and is therefore disabled
Note that I tested this script with a portable N++
v8.4.6
version, located on anUSB
drive
Now, @datatraveller1, in order to ONLY copy the visible lines, here is a work-around :
-
Run the @alan-kilborn Python script first
-
Open the Mark dialog (
Crl + M
) -
Write the same string than in Alan’s Python script, in the
Find what
zone -
Tick the four options
Bookmark line
,Purge for each search
,Match case
andwrap around
, only -
Select the
Normal
search mode -
Click on the
Mark All
button -
Run the
Search > Bookmark > Copy Bookmarked Lines
option or right-click on the bookmark margin and choose it -
Open a new tab (
Ctrl + N
) -
Paste the copied text (
Ctrl + V
)
Finally, @alan-kilborn, @paul-wormer and @datatraveller1, regarding the method, wouldn’t it be better to :
-
Trigger a usual Mark action, with the
Bookmark line
option set -
Elaborate a
Python
script which simply would hide ALL non-marked lines, from the Mark results !
3
immediate advantages of this method :-
You are not limited, anymore to a
literal
string -
You could choose a
sensitive
ornon-insensitive
search -
You could choose the search mode :
Normal
,Extended
orRegular expresion
Best Regards,
guy038
-
-
@guy038 said in Show only lines that contain a search term:
as I increased the size of the test file, the waiting time to get the line margin highlighted, in salmon color, increased in parallel
I did mention earlier: “Large files may produce performance problems. Scripts aren’t blazingly fast…”
I use a variant of this script on what I consider a large file (in the work that I do). It works acceptably, performance wise.
HOWEVER: The correctness (or rather incorrectness) problem observed by some users needs to be addressed; if it doesn’t show/hide the correct lines, who cares how fast/slow it is?
wouldn’t it be better to :
Trigger a usual Mark action, with the Bookmark line option set
Elaborate a Python script which simply would hide ALL non-marked lines, from the Mark results !The downsides to that approach are:
- It requires multiple steps
- Bookmarks become dedicated to that purpose (I like to “filter lines” and then set meaningful bookmarks on only some of the matches, before revealing all lines again and continuing to work with what I’ve bookmarked)
The user interface of the script could evolve to allow things like regex, match-case, etc. But…let’s go for correctness first.
-
Hi, @alan-kilborn and All,
Ah…OK. I understand your working method. You should indeed remove these false positive answers, first of all !
By the way, I’m really surprised that I never encountered any visible false lines during my tests, even in the case of a heavy file ! However, my laptop is not a WAR LIGHTNING ! Bought in
July 2021
, this HP laptop comes withWindow 10 Pro 64
, a512 Mb
Solid State Drive and16 Mb DDR4
of RAM.Moreover, as I have not finished cleaning my old micro and moved all my data yet, I still use a
USB
key containing the N++ portable version8.4.6
for all my tests, which must certainly slow down the execution of yourPython
script ??BR
guy038
-
@guy038 said in Show only lines that contain a search term:
I have not finished cleaning my old micro and moved all my data yet,
What is taking so long? Are you ever going to finish with this?? :-)
I still use a USB key containing the N++ portable version 8.4.6 for all my tests, which must certainly slow down the execution of your Python script ??
I don’t see how…everything should be “in memory” for the operations of the script.
I’m really surprised that I never encountered any visible false lines during my tests
Yes that is interesting…
You should indeed remove these false positive answers, first of all !
Indeed. However, at the moment, I’m out of ideas on what the problem might be. :-(
-
Hi all,
Just two thoughts-
I’m not sure - what does PC speed have to do with line visibility?
-
Doesn’t “Find All in Current Document” basically do the same thing as the Python script? Actually, the Notepad++ programmer just has to output the same thing in the same window to filter the text, doesn’t he?
-