Need help with custom sort operations
-
@Mark-Olson said in Need help with custom sort operations:
My block-sorter just removed a trailing newline after the DDDDDDDDDDDD block.
If you look at the first post the example there showed a “trailing” blank/empty line. They wanted this kept.
From all @Dean-Corso posts so far I think they want the ability to dictate 2 parameters before running the process.
- How many lines in each block. They state they are well defined, so ALL the same number of lines.
- How many of the lines (from start of block down) will be sorted first within the block.
Lastly another sort is performed on the file using ONLY the first header line of each block (keeping each block intact).
Currently I don’t think your script will work that correctly, certainly as you seem to be removing “blank” lines that won’t work for them.
Terry
-
@Terry-R said in Need help with custom sort operations:
If you look at the first post the example there showed a “trailing” blank/empty line. They wanted this kept.
Yep, that’s a limitation of my script. I’m not going to change it, because every way I’ve tried to get around that limitation makes things worse.
Fortunately, it is quite easy to make a regex-replace that creates a trailing blank/empty line after each block. Just replace
(?<!\A)(^[A-Z]+$)
with\r\n{0}
-
@Dean-Corso said in Need help with custom sort operations:
Just need to have some function / dialog where I can enter those paramters…
Enter which every line to sort: 2
Enter how many lines to sort: 1Done. Just use the following script in PythonScript
# -*- coding: utf-8 -*- ######################################### # # sort_line_blocks_and_headers # ######################################### # references: # https://community.notepad-plus-plus.org/topic/24742/need-help-with-custom-sort-operations # HOW IT WORKS: # 1. Divide the document into blocks of N lines, of which the top M lines are the header. # 2. Sort the header of each block # 3. Sort the blocks by the sorted headers. # EXAMPLE INPUT: # CCCCCCCCCCCCCCCCCCC # AAAAAAAAAAAAAAAAAAA # 3 # BBBBBBBBBBBBBBBBBBB # AAAAAAAAAAAAAAAAAAA # 2 # CCCCCCCCCCCCCCCCCCC # BBBBBBBBBBBBBBBBBBB # 1 # RESPOND TO THE 4 PROMPTS AS FOLLOWS: 3, yes, 2, yes # RESULT: # AAAAAAAAAAAAAAAAAAA # BBBBBBBBBBBBBBBBBBB # 2 # AAAAAAAAAAAAAAAAAAA # CCCCCCCCCCCCCCCCCCC # 3 # BBBBBBBBBBBBBBBBBBB # CCCCCCCCCCCCCCCCCCC # 1 from __future__ import print_function from Npp import * def main(): eol = [ '\r\n', '\r', '\n' ][ editor.getEOLMode() ] while True: lines_per_block_str = notepad.prompt('number of lines per block', 'sort blocks', '3') if lines_per_block_str is None: return try: lines_per_block = int(lines_per_block_str) if lines_per_block < 1: raise ValueError except: notepad.messageBox('lines per block must be integer >= 1') continue sort_blocks_ascending_str = notepad.prompt('sort blocks least to greatest (yes/no)', 'sort blocks', 'yes') if sort_blocks_ascending_str is None: return reverse_blocks = sort_blocks_ascending_str != 'yes' lines_in_header_str = notepad.prompt('number of lines in header', 'sort blocks', '1') if lines_in_header_str is None: return sort_header_ascending_str = notepad.prompt('sort header least to greatest (yes/no)', 'sort blocks', 'yes') if sort_header_ascending_str is None: return reverse_header = sort_header_ascending_str != 'yes' try: lines_in_header = int(lines_in_header_str) if lines_in_header < 1 or lines_in_header > lines_per_block: raise ValueError except: notepad.messageBox('lines in header must be integer <= lines in block and >= 1') continue break lines_not_in_header = lines_per_block - lines_in_header find_regex = '(?-s)((?:.*\R){%s})' % lines_in_header if lines_not_in_header: find_regex += '((?:.*(?:\R|\Z)){%s})' % lines_not_in_header # print(find_regex) header_block_list = [] doc_len = editor.getLength() def on_match(m): sorted_header = eol.join(sorted(m.group(1).splitlines(), reverse=reverse_header)) edited_block = sorted_header + eol + m.group(2) if m.span()[1] < doc_len: edited_block = edited_block[:-len(eol)] header_block_list.append((sorted_header, edited_block)) editor.research(find_regex, on_match) header_block_list_sorted = [x[1] for x in sorted(header_block_list, reverse=reverse_blocks)] # print(header_block_list) editor.setText(eol.join(header_block_list_sorted)) if __name__ == '__main__': main()
-
-
@Mark-Olson
My initial script had some bugs. Use this instead# -*- coding: utf-8 -*- ######################################### # # sort_line_blocks_and_headers # ######################################### # references: # https://community.notepad-plus-plus.org/topic/24742/need-help-with-custom-sort-operations # HOW IT WORKS: # 1. Divide the document into blocks of N lines, of which the top M lines are the header. # 2. Sort the header of each block # 3. Sort the blocks by the sorted headers. # EXAMPLE INPUT: # CCCCCCCCCCCCCCCCCCC # AAAAAAAAAAAAAAAAAAA # 3 # BBBBBBBBBBBBBBBBBBB # AAAAAAAAAAAAAAAAAAA # 2 # CCCCCCCCCCCCCCCCCCC # BBBBBBBBBBBBBBBBBBB # 1 # RESPOND TO THE 4 PROMPTS AS FOLLOWS: 3, yes, 2, yes # RESULT: # AAAAAAAAAAAAAAAAAAA # BBBBBBBBBBBBBBBBBBB # 2 # AAAAAAAAAAAAAAAAAAA # CCCCCCCCCCCCCCCCCCC # 3 # BBBBBBBBBBBBBBBBBBB # CCCCCCCCCCCCCCCCCCC # 1 from __future__ import print_function from Npp import * def main(): eol = [ '\r\n', '\r', '\n' ][ editor.getEOLMode() ] while True: lines_per_block_str = notepad.prompt('number of lines per block', 'sort blocks', '3') if lines_per_block_str is None: return try: lines_per_block = int(lines_per_block_str) if lines_per_block < 1: raise ValueError except: notepad.messageBox('lines per block must be integer >= 1') continue sort_blocks_ascending_str = notepad.prompt('sort blocks least to greatest (yes/no)', 'sort blocks', 'yes') if sort_blocks_ascending_str is None: return reverse_blocks = sort_blocks_ascending_str != 'yes' lines_in_header_str = notepad.prompt('number of lines in header', 'sort blocks', '1') if lines_in_header_str is None: return try: lines_in_header = int(lines_in_header_str) if lines_in_header < 1 or lines_in_header > lines_per_block: raise ValueError except: notepad.messageBox('lines in header must be integer <= lines in block and >= 1') continue if lines_in_header > 1: sort_header_ascending_str = notepad.prompt('sort header least to greatest (yes/no)', 'sort blocks', 'yes') if sort_header_ascending_str is None: return reverse_header = sort_header_ascending_str != 'yes' else: reverse_header = False break lines_not_in_header = lines_per_block - lines_in_header if lines_not_in_header: find_regex = '(?-s)((?:.*\R){%s})((?:.*(?:\R|\Z)){%s})' % (lines_in_header, lines_not_in_header) else: find_regex = '(?-s)((?:.*(?:\R|\Z)){%s})' % lines_in_header # print(find_regex) header_block_list = [] doc_len = editor.getLength() def on_match(m): is_last_match = m.span()[1] == doc_len group1 = m.group(1) if not group1: return sorted_header = eol.join(sorted(group1.splitlines(), reverse=reverse_header)) if lines_not_in_header: edited_block = sorted_header + eol + m.group(2) else: edited_block = sorted_header if not is_last_match and edited_block.endswith(eol): edited_block = edited_block[:-len(eol)] header_block_list.append((sorted_header, edited_block)) editor.research(find_regex, on_match) header_block_list_sorted = [x[1] for x in sorted(header_block_list, reverse=reverse_blocks)] # print(header_block_list) editor.setText(eol.join(header_block_list_sorted)) if __name__ == '__main__': main()
-
-
Thank you guys for trying to help me (and others maybe too) to find a solution for my sorting problems. Sounds great that you was able to create a script for that task. Only problem at the moment is I don’t know yet how to use it so maybe you could give me a little crash curse what plugin to install and what parameters I have to execute (python + your script + my npp tab or file) etc. So I have python installed already on computer. My question is also whether I can execute your py script in npp for the open / focused tab itself (also when I just have written text in real time without saved file). As I said, not sure what plugin to install for py (by the way, the PyNPP plugin is not listed in plugin manager) if I have to. Would be nice if you could tell me and show one or two examples how to execute your script with an file which is open in notepad++ or if possible also using a direct tab (none file) in pipe mode etc if this is supported. Thank you.
-
@Dean-Corso said in Need help with custom sort operations:
little crash curse what plugin to install and what parameters I have to execute
-
Thank you for that link & info. Ok, I think I got it working now to execute your script @Mark-Olson and on my first tests it seems to work as I wanted! Coolio! I will do some more tests but it looks already pretty well.
Is this sort operation which works now for all complete lines also doable to for that rectangle selection (maybe for later)? Lets say I have few vertical text blocks and I want to select any of them via rectangle selection and want to sort all lines from that selection. Could that be possible too?
Just as info: I found out that if I want to sort specific rectangle selected lines (middle - end) that it’s only working when I trim all the selected lines starting at same vertical position and I also need to change all blank chars like TABs to Space before I do a sort to make it work. Just don’t understand why but I have to.
Example: 2 blocks / tabs between / sort ascending ic.
66666 qqqqqqq 55555555 zzzzzzz 777 aaaaaaa 88888888888 xxxxxxx
I just select the second block via rectangle select then I do the line sort with ingore case and I got this out.
55555555 zzzzzzz 88888888888 xxxxxxx 777 aaaaaaa 66666 qqqqqqq
=? So when I do same but before I call the blank option “Tab to Space” and select the 2. block and call then the sort function then I get this out…
777 aaaaaaa 66666 qqqqqqq 88888888888 xxxxxxx 55555555 zzzzzzz
which is correctly now. Just don’t understand why I have to change the tabs to space before the selection so normally it should play no role at this point what comes before the selection you know. Is this a bug or do I understand it wrong? You know what I mean?
Thank you very much so far guys.
-
@Dean-Corso If it helps you, my plugin Columns++ (still in alpha stage, though apparently stable) can sort column selections containing tabs.
The reason the native sorts in Notepad++ don’t work is because they determine the first and last character positions of the rectangular selection in the first line of the selection and then apply those character positions to all lines; they don’t use the line-by-line contents of the rectangular selection.
-
Thank you for that info. I did test your plugin and it seems to work to do the sort operation without conversion. I also see your plugin has also some more interesting functions I need to check out more.
I think one of the problem in npp & plugin is that in both are missing some clear methods to use specific operations just for all lines OR selection with or without to handle lines you know what I mean?
Example: Sort function.
1.) I want to handle all lines
2.) I want to handle just my selection (normal case)
3.) I want to handle rectangle selection + lines
4.) I want to handle rectangle selection itself onlyPoint 1 & 2 seems to work good so far in npp to do that but in special cases 3 & 4 it could be problematic.
When I sort any lines (selected or not) then empty lines getting removed. Why? How to prevent that? On first view it looks good to remove blank lines but in case when I want to keep the lines original then I need to add empty lines manually after sorting.
What is when I just want to sort the content of my rectangle selection without to move all the lines? I see its also not doable so far in npp.
What is when I want to sort my rectangle selected part / lines and ignore all blank chars till the first char begins? Not working as I can see and I need to bring all the selected lines starting at the same position point like this…(I did rectangle select the right part)
66666 Qqqqqqq 55555555 Zzzzzzz 777 Aaaaaaa 88888888888 Xxxxxxx
…and before I can sort I need to change them to this…
66666 Qqqqqqq 55555555 Zzzzzzz 777 Aaaaaaa 88888888888 Xxxxxxx
…and now I can do a sort which works and moves all lines. But what if I want just to sort that selection only and keep the left part / lines unchanged? Like this…
66666 Aaaaaaa 55555555 Qqqqqqq 777 Xxxxxxx 88888888888 Zzzzzzz
…only selection was sorted and the rest is same. So is it too specific what I want?
-
@Dean-Corso said in Need help with custom sort operations:
Example: Sort function.
1.) I want to handle all lines
2.) I want to handle just my selection (normal case)
3.) I want to handle rectangle selection + lines
4.) I want to handle rectangle selection itself onlyPoint 1 & 2 seems to work good so far in npp to do that but in special cases 3 & 4 it could be problematic.
Case 3 should work using Columns++. It also works with the built-in Notepad++ sorts provided you have a fixed-pitch font and no tabs.
Case 4 can be handled easily this way:
Make the rectangular selection.
Copy the selection to the clipboard.
Open a new tab and paste the selection into it.
Sort the new tab.
Make a rectangular selection encompassing all the data.
Copy the selection to the clipboard.
Switch back to the original tab and paste the data into the rectangular selection.Doing this in one step, instead of copying and pasting into a temporary tab, is something I could include in Columns++. I can’t promise, but the next time I make changes I’ll see if it can be added without making things too clumsy or complicated.
When I sort any lines (selected or not) then empty lines getting removed. Why? How to prevent that? On first view it looks good to remove blank lines but in case when I want to keep the lines original then I need to add empty lines manually after sorting.
What is when I just want to sort the content of my rectangle selection without to move all the lines? I see its also not doable so far in npp.
That’s a bit more complicated. The blank lines aren’t being removed, they’re just sorting, the same as all the other lines; to the beginning if it’s an ascending sort or to the end if it’s a descending sort. If you want to skip sorting those lines, then it becomes confusing. Do they serve as breaks — meaning each section between blank lines is sorted, but lines aren’t moved across blank lines? Or do all the non-blank lines get sorted, but then positioned only where non-blank lines occurred before, leaving the blank lines at the same line number positions? Or do the blank lines “stick with” the preceding non-blank line? Or the following non-blank line? Or something else?
I think there are too many variations in what someone could want to make it practical to implement.
What is when I want to sort my rectangle selected part / lines and ignore all blank chars till the first char begins? Not working as I can see and I need to bring all the selected lines starting at the same position point like this…(I did rectangle select the right part)
66666 Qqqqqqq 55555555 Zzzzzzz 777 Aaaaaaa 88888888888 Xxxxxxx
…and before I can sort I need to change them to this…
66666 Qqqqqqq 55555555 Zzzzzzz 777 Aaaaaaa 88888888888 Xxxxxxx
At present, none of the sorts have an option to ignore leading blanks, but that is a logical request. For this, too, I’ll say that when I next make some changes to Columns++, I will see if something like this can work. Offhand, I think it might be handy to make a sort that applies a user-specified regular expression to each line in a rectangular selection to determine the sort key. Skipping leading blanks would be an easy special case of that.
-
Thank you for the feedback. Yes I have seen that I can copy / paste the rectangle selection using another tab or free space below etc but this is again a detour I want to prevent of course. Sounds good if you could add some more features in your plugin if possible. I already added your plugin releases into my RSS feed list.
I was trying around to handle specific tasks I got in my mind when editing different text and lines using Notepad++ and have seen that not all I wanna do is doable yet. Would be good if you could add more features into your plugin later to have more capabilities & possibilities to edit text, lines and selections etc. Just another suggestion to you @Coises, could you also add that specific sort method (block sort / move) I did requested and @Mark-Olson made that python script for that. Maybe you can adapt it and include it into your plugin sort operation with extra custom dialog to enter the user data. Thanks.
PS: Just a question about this forum style. How to reply & see the forum at the same time? Each time when I wanna reply an answer then the text field does popup as overlay and I can’t see the text of the posts anymore and I can just move this reply window a little bit down only. I would like to see all at once like in other forums too. How to do that here?
-
@Dean-Corso
These are very interesting ideas! Many of them are pretty tricky to implement. The good news is that if you are feeling adventurous and want to learn some useful skills, you can begin trying to implement such solutions yourself by learning regular expressions and Python. By the way, this is not me telling you to leave us alone, but rather a general observation that sometimes if you want something done right you just have to do it yourself.
I can’t speak for Coises, of course, but a few suggestions about feature requests:- There are many excellent features that are simply beyond what the developer envisions as the scope of their application. For instance, I have a JSON plugin. A user might say, “hey, I’d like a CSV parser so I can use your tree view for CSV files as well!” And that would be really cool! The trouble is that a CSV parser is just outside the scope of my plugin, and it opens the door for even more “feature creep”.
- I have often created a script solution that seemed to me like it solved two or three problems simultaneously, and I would never have to do this complicated multi-step process again. Instead, I created a fragile tool that solved two or three problems badly, and I often wound up having to do the multi-step process anyway because I could use more effective specialized tools for each step.
-
So I know what you mean and learning some helpfully language +/- Regular Expression would make me more independent to handle specific tasks by myself in the best case. I tried to keep RegEx Syntax in my mind but always forget it when not using it anymore for a longer while. Also have problem with those different syntax styles I have to use here or there what makes it also more problematic for me. Will see what I can do with python script in the future. Otherwise I will ask you guys again to get some help if you don’t mind.
Your script works very good so far and didn’t found any issues yet. I also made a quick accessible icon in the toolbar to prevent the long menu choosing method. Thank you.
-
I just have a little question about your script you made (last one you did post here). So as I said it’s working but I have a problem with the sort operation which is ascending & descending what is good and as I wanted but I need it with the ignore case option. So could you add or just change it in the script etc? Thank you.
-
@Dean-Corso said in Need help with custom sort operations:
I need it with the ignore case option. So could you add or just change it in the script etc?
Done!
# -*- coding: utf-8 -*- ######################################### # # sort_line_blocks_and_headers # ######################################### # references: # https://community.notepad-plus-plus.org/topic/24742/need-help-with-custom-sort-operations # HOW IT WORKS: # 1. Divide the document into blocks of N lines, of which the top M lines are the header. # 2. Sort the header of each block # 3. Sort the blocks by the sorted headers. # EXAMPLE INPUT: # CCCCCCCCCCCCCCCCCCC # BBBBBBBBBBBBBBBBBBB # 3 # BBBBBBBBBBBBBBBBBBB # aaaaaaaaaaaaaaaaaaa # 2 # CCCCCCCCCCCCCCCCCCC # AAAAAAAAAAAAAAAAAAA # 1 # bbbbbbbbbbbbbbbbbbb # bbbbbbbbbbbbbbbbbbb # 4 # USE THE FOLLOWING OPTIONS: # [ 3 ]Lines per block # [ x ]Sort blocks least to greatest # [ 2 ]Lines in header # [ x ]Sort header least to greatest # [ x ]Case-insensitive sort # RESULT: # aaaaaaaaaaaaaaaaaaa # BBBBBBBBBBBBBBBBBBB # 2 # AAAAAAAAAAAAAAAAAAA # CCCCCCCCCCCCCCCCCCC # 1 # bbbbbbbbbbbbbbbbbbb # bbbbbbbbbbbbbbbbbbb # 4 # BBBBBBBBBBBBBBBBBBB # CCCCCCCCCCCCCCCCCCC # 3 from __future__ import print_function import re from Npp import * def option_check(input_text, option_text, type_): m = re.search(r'\\[([^]]+)\\] ?' + option_text, input_text) valstrip = '' if not m else m.group(1).strip() # print(option_text, valstrip, type_(valstrip)) return type_(valstrip) def main(): eol = [ '\r\n', '\r', '\n' ][ editor.getEOLMode() ] while True: user_options = notepad.prompt('Options for sorting line blocks and headers', 'Sort line blocks and headers', '\r\n'.join([ '[ 3 ]Lines per block', '[ x ]Sort blocks least to greatest', '[ 1 ]Lines in header', '[ x ]Sort header least to greatest', '[ x ]Case-insensitive sort'])) if not (user_options and user_options.strip()): return try: lines_per_block = option_check(user_options, 'Lines per block', int) assert lines_per_block >= 1 except: notepad.messageBox('lines per block must be integer >= 1') continue reverse_blocks = not option_check(user_options, 'Sort blocks least', bool) try: lines_in_header = option_check(user_options, 'Lines in header', int) assert 1 <= lines_in_header <= lines_per_block except: notepad.messageBox('lines in header must be integer <= lines in block and >= 1') continue reverse_header = not option_check(user_options, 'Sort header', bool) ignorecase = option_check(user_options, 'Case-insensitive', bool) break lines_not_in_header = lines_per_block - lines_in_header if lines_not_in_header: find_regex = '(?-s)((?:.*\R){%s})((?:.*(?:\R|\Z)){%s})' % (lines_in_header, lines_not_in_header) else: find_regex = '(?-s)((?:.*(?:\R|\Z)){%s})' % lines_in_header if ignorecase: find_regex = '(?i)' + find_regex # print(find_regex) header_block_list = [] doc_len = editor.getLength() def on_match(m): is_last_match = m.span()[1] == doc_len group1 = m.group(1) if not group1: return header = group1.splitlines() # print('header before sort = {h}'.format(h=header)) if ignorecase: header.sort(key=str.upper, reverse=reverse_header) else: header.sort(reverse=reverse_header) # print('header after sort = {h}'.format(h=header)) sorted_header = eol.join(header) if lines_not_in_header: edited_block = sorted_header + eol + m.group(2) else: edited_block = sorted_header if not is_last_match and edited_block.endswith(eol): edited_block = edited_block[:-len(eol)] header_block_list.append((sorted_header, edited_block)) editor.research(find_regex, on_match) if ignorecase: header_block_list.sort(key=lambda x: x[0].upper(), reverse=reverse_blocks) else: header_block_list.sort(key=lambda x: x[0], reverse=reverse_blocks) sorted_blocks_str = eol.join(x[1] for x in header_block_list) # print(header_block_list) editor.setText(sorted_blocks_str) if __name__ == '__main__': main()
-
-
Thank you very much Mark. Works.