Make normal filtering of plain ASCII lines faster
This patch adds a field lines_not_ascii to the MenuState structure. The nth entry is 0 unless the nth member of MenuState.lines has a non-ascii codepoint in it. All comparison functions (menu_match_cb type) take an additional argument to tell them if the thing they are matching is not_ascii. They can use this to determine whether to collate and case-fold the input (for non-ascii strings), or whether to use strstr/strcasestr (for ascii strings). The change is not currently implemented for flex matching, due to my laziness. However, it should be a simple enough matter to add. For my large input of 400,000 lines, this reduces typical filtering time to about ten microseconds from about 2 seconds.
This commit is contained in:
@@ -102,7 +102,7 @@ int find_arg ( const char * const key );
|
|||||||
*
|
*
|
||||||
* @returns 1 when matches, 0 otherwise
|
* @returns 1 when matches, 0 otherwise
|
||||||
*/
|
*/
|
||||||
int token_match ( char **tokens, const char *input, int case_sensitive,
|
int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
|
||||||
__attribute__( ( unused ) ) unsigned int index,
|
__attribute__( ( unused ) ) unsigned int index,
|
||||||
__attribute__( ( unused ) ) Switcher * data );
|
__attribute__( ( unused ) ) Switcher * data );
|
||||||
|
|
||||||
@@ -152,4 +152,11 @@ char helper_parse_char ( const char *arg );
|
|||||||
* Set the application arguments.
|
* Set the application arguments.
|
||||||
*/
|
*/
|
||||||
void cmd_set_arguments ( int argc, char **argv );
|
void cmd_set_arguments ( int argc, char **argv );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param str a UTF8 string
|
||||||
|
* @return 1 if the string contains any non-ascii codepoints
|
||||||
|
*/
|
||||||
|
int is_not_ascii ( const char *str );
|
||||||
|
|
||||||
#endif // ROFI_HELPER_H
|
#endif // ROFI_HELPER_H
|
||||||
|
@@ -78,7 +78,7 @@ typedef enum
|
|||||||
*
|
*
|
||||||
* @returns 1 when it matches, 0 if not.
|
* @returns 1 when it matches, 0 if not.
|
||||||
*/
|
*/
|
||||||
typedef int ( *menu_match_cb )( char **tokens, const char *input, int case_sensitive, unsigned int index, Switcher *data );
|
typedef int ( *menu_match_cb )( char **tokens, const char *input, int not_ascii, int case_sensitive, unsigned int index, Switcher *data );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param sw the Switcher to show.
|
* @param sw the Switcher to show.
|
||||||
|
@@ -169,7 +169,7 @@ static SwitcherMode combi_mode_result ( int mretv, char **input, unsigned int se
|
|||||||
}
|
}
|
||||||
return MODE_EXIT;
|
return MODE_EXIT;
|
||||||
}
|
}
|
||||||
static int combi_mode_match ( char **tokens, const char *input,
|
static int combi_mode_match ( char **tokens, const char *input, int not_ascii,
|
||||||
int case_sensitive, unsigned int index, Switcher *sw )
|
int case_sensitive, unsigned int index, Switcher *sw )
|
||||||
{
|
{
|
||||||
CombiModePrivateData *pd = sw->private_data;
|
CombiModePrivateData *pd = sw->private_data;
|
||||||
@@ -178,13 +178,13 @@ static int combi_mode_match ( char **tokens, const char *input,
|
|||||||
if ( index >= pd->starts[i] && index < ( pd->starts[i] + pd->lengths[i] ) ) {
|
if ( index >= pd->starts[i] && index < ( pd->starts[i] + pd->lengths[i] ) ) {
|
||||||
if ( tokens && input[0] && tokens[0][0] == '!' ) {
|
if ( tokens && input[0] && tokens[0][0] == '!' ) {
|
||||||
if ( tokens[0][1] == pd->switchers[i]->name[0] ) {
|
if ( tokens[0][1] == pd->switchers[i]->name[0] ) {
|
||||||
return pd->switchers[i]->token_match ( &tokens[1], input, case_sensitive,
|
return pd->switchers[i]->token_match ( &tokens[1], input, not_ascii, case_sensitive,
|
||||||
index - pd->starts[i], pd->switchers[i] );
|
index - pd->starts[i], pd->switchers[i] );
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return pd->switchers[i]->token_match ( tokens, input, case_sensitive,
|
return pd->switchers[i]->token_match ( tokens, input, not_ascii, case_sensitive,
|
||||||
index - pd->starts[i], pd->switchers[i] );
|
index - pd->starts[i], pd->switchers[i] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -316,7 +316,7 @@ int dmenu_switcher_dialog ( void )
|
|||||||
char **tokens = tokenize ( select, config.case_sensitive );
|
char **tokens = tokenize ( select, config.case_sensitive );
|
||||||
unsigned int i = 0;
|
unsigned int i = 0;
|
||||||
for ( i = 0; i < cmd_list_length; i++ ) {
|
for ( i = 0; i < cmd_list_length; i++ ) {
|
||||||
if ( token_match ( tokens, cmd_list[i], config.case_sensitive, 0, NULL ) ) {
|
if ( token_match ( tokens, cmd_list[i], is_not_ascii(cmd_list[i]), config.case_sensitive, 0, NULL ) ) {
|
||||||
pd->selected_line = i;
|
pd->selected_line = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -322,6 +322,7 @@ typedef struct _SwitcherModePrivateData
|
|||||||
} SwitcherModePrivateData;
|
} SwitcherModePrivateData;
|
||||||
|
|
||||||
static int window_match ( char **tokens, __attribute__( ( unused ) ) const char *input,
|
static int window_match ( char **tokens, __attribute__( ( unused ) ) const char *input,
|
||||||
|
__attribute__( ( unused) ) int not_ascii,
|
||||||
int case_sensitive, unsigned int index, Switcher *sw )
|
int case_sensitive, unsigned int index, Switcher *sw )
|
||||||
{
|
{
|
||||||
SwitcherModePrivateData *rmpd = (SwitcherModePrivateData *) sw->private_data;
|
SwitcherModePrivateData *rmpd = (SwitcherModePrivateData *) sw->private_data;
|
||||||
@@ -338,19 +339,19 @@ static int window_match ( char **tokens, __attribute__( ( unused ) ) const char
|
|||||||
// e.g. when searching 'title element' and 'class element'
|
// e.g. when searching 'title element' and 'class element'
|
||||||
char *ftokens[2] = { tokens[j], NULL };
|
char *ftokens[2] = { tokens[j], NULL };
|
||||||
if ( !test && c->title[0] != '\0' ) {
|
if ( !test && c->title[0] != '\0' ) {
|
||||||
test = token_match ( ftokens, c->title, case_sensitive, 0, NULL );
|
test = token_match ( ftokens, c->title, is_not_ascii(c->title), case_sensitive, 0, NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !test && c->class[0] != '\0' ) {
|
if ( !test && c->class[0] != '\0' ) {
|
||||||
test = token_match ( ftokens, c->class, case_sensitive, 0, NULL );
|
test = token_match ( ftokens, c->class, is_not_ascii(c->title), case_sensitive, 0, NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !test && c->role[0] != '\0' ) {
|
if ( !test && c->role[0] != '\0' ) {
|
||||||
test = token_match ( ftokens, c->role, case_sensitive, 0, NULL );
|
test = token_match ( ftokens, c->role, is_not_ascii(c->title), case_sensitive, 0, NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !test && c->name[0] != '\0' ) {
|
if ( !test && c->name[0] != '\0' ) {
|
||||||
test = token_match ( ftokens, c->name, case_sensitive, 0, NULL );
|
test = token_match ( ftokens, c->name, is_not_ascii(c->title), case_sensitive, 0, NULL );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( test == 0 ) {
|
if ( test == 0 ) {
|
||||||
|
@@ -310,11 +310,13 @@ int find_arg_char ( const char * const key, char *val )
|
|||||||
* Shared 'token_match' function.
|
* Shared 'token_match' function.
|
||||||
* Matches tokenized.
|
* Matches tokenized.
|
||||||
*/
|
*/
|
||||||
static int fuzzy_token_match ( char **tokens, const char *input, int case_sensitive )
|
static int fuzzy_token_match ( char **tokens, const char *input, __attribute__( (unused) ) int not_ascii, int case_sensitive )
|
||||||
{
|
{
|
||||||
int match = 1;
|
int match = 1;
|
||||||
char *compk = token_collate_key ( input, case_sensitive );
|
char *compk = token_collate_key ( input, case_sensitive );
|
||||||
// Do a tokenized match.
|
// Do a tokenized match.
|
||||||
|
// TODO: this doesn't work for unicode input, because it may split a codepoint which is over two bytes.
|
||||||
|
// TODO this does not use the non-ascii speed-up either.
|
||||||
if ( tokens ) {
|
if ( tokens ) {
|
||||||
for ( int j = 0; match && tokens[j]; j++ ) {
|
for ( int j = 0; match && tokens[j]; j++ ) {
|
||||||
char *t = compk;
|
char *t = compk;
|
||||||
@@ -331,28 +333,33 @@ static int fuzzy_token_match ( char **tokens, const char *input, int case_sensit
|
|||||||
g_free ( compk );
|
g_free ( compk );
|
||||||
return match;
|
return match;
|
||||||
}
|
}
|
||||||
static int normal_token_match ( char **tokens, const char *input, int case_sensitive )
|
static int normal_token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive )
|
||||||
{
|
{
|
||||||
int match = 1;
|
int match = 1;
|
||||||
char *compk = token_collate_key ( input, case_sensitive );
|
char *compk = not_ascii ? token_collate_key ( input, case_sensitive ) : (char *) input;
|
||||||
|
|
||||||
// Do a tokenized match.
|
// Do a tokenized match.
|
||||||
|
|
||||||
if ( tokens ) {
|
if ( tokens ) {
|
||||||
|
char *(*comparison)(const char *, const char *);
|
||||||
|
comparison = (case_sensitive || not_ascii) ? strstr : strcasestr;
|
||||||
for ( int j = 0; match && tokens[j]; j++ ) {
|
for ( int j = 0; match && tokens[j]; j++ ) {
|
||||||
match = ( strstr ( compk, tokens[j] ) != NULL );
|
match = (comparison( compk, tokens[j] ) != NULL );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
g_free ( compk );
|
|
||||||
|
if (not_ascii) g_free ( compk );
|
||||||
|
|
||||||
return match;
|
return match;
|
||||||
}
|
}
|
||||||
int token_match ( char **tokens, const char *input, int case_sensitive,
|
int token_match ( char **tokens, const char *input, int not_ascii, int case_sensitive,
|
||||||
__attribute__( ( unused ) ) unsigned int index,
|
__attribute__( ( unused ) ) unsigned int index,
|
||||||
__attribute__( ( unused ) ) Switcher *data )
|
__attribute__( ( unused ) ) Switcher *data )
|
||||||
{
|
{
|
||||||
if ( config.fuzzy ) {
|
if ( config.fuzzy ) {
|
||||||
return fuzzy_token_match ( tokens, input, case_sensitive );
|
return fuzzy_token_match ( tokens, input, not_ascii, case_sensitive );
|
||||||
}
|
}
|
||||||
return normal_token_match ( tokens, input, case_sensitive );
|
return normal_token_match ( tokens, input, not_ascii, case_sensitive );
|
||||||
}
|
}
|
||||||
|
|
||||||
int execute_generator ( const char * cmd )
|
int execute_generator ( const char * cmd )
|
||||||
@@ -478,3 +485,12 @@ void config_sanity_check ( )
|
|||||||
config.menu_bg_alt = config.menu_bg;
|
config.menu_bg_alt = config.menu_bg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int is_not_ascii ( const char * str )
|
||||||
|
{
|
||||||
|
while (*str > 0) {
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
if (*str) return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -244,6 +244,7 @@ typedef struct MenuState
|
|||||||
unsigned int *selected_line;
|
unsigned int *selected_line;
|
||||||
MenuReturn retv;
|
MenuReturn retv;
|
||||||
char **lines;
|
char **lines;
|
||||||
|
int *lines_not_ascii;
|
||||||
int line_height;
|
int line_height;
|
||||||
}MenuState;
|
}MenuState;
|
||||||
|
|
||||||
@@ -307,6 +308,8 @@ static void menu_free_state ( MenuState *state )
|
|||||||
g_free ( state->boxes );
|
g_free ( state->boxes );
|
||||||
g_free ( state->line_map );
|
g_free ( state->line_map );
|
||||||
g_free ( state->distance );
|
g_free ( state->distance );
|
||||||
|
|
||||||
|
g_free ( state->lines_not_ascii );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -724,7 +727,7 @@ static void menu_refilter ( MenuState *state )
|
|||||||
|
|
||||||
// input changed
|
// input changed
|
||||||
for ( unsigned int i = 0; i < state->num_lines; i++ ) {
|
for ( unsigned int i = 0; i < state->num_lines; i++ ) {
|
||||||
int match = state->sw->token_match ( tokens, state->lines[i], config.case_sensitive, i, state->sw );
|
int match = state->sw->token_match ( tokens, state->lines[i], state->lines_not_ascii[i], config.case_sensitive, i, state->sw );
|
||||||
|
|
||||||
// If each token was matched, add it to list.
|
// If each token was matched, add it to list.
|
||||||
if ( match ) {
|
if ( match ) {
|
||||||
@@ -749,6 +752,7 @@ static void menu_refilter ( MenuState *state )
|
|||||||
}
|
}
|
||||||
state->filtered_lines = state->num_lines;
|
state->filtered_lines = state->num_lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->selected = MIN ( state->selected, state->filtered_lines - 1 );
|
state->selected = MIN ( state->selected, state->filtered_lines - 1 );
|
||||||
|
|
||||||
if ( config.auto_select == TRUE && state->filtered_lines == 1 && state->num_lines > 1 ) {
|
if ( config.auto_select == TRUE && state->filtered_lines == 1 && state->num_lines > 1 ) {
|
||||||
@@ -1013,6 +1017,14 @@ MenuReturn menu ( Switcher *sw, char **input, char *prompt, unsigned int *select
|
|||||||
};
|
};
|
||||||
// Request the lines to show.
|
// Request the lines to show.
|
||||||
state.lines = sw->get_data ( &( state.num_lines ), sw );
|
state.lines = sw->get_data ( &( state.num_lines ), sw );
|
||||||
|
state.lines_not_ascii = g_malloc0_n( state.num_lines, sizeof( int ) );
|
||||||
|
|
||||||
|
// find out which lines contain non-ascii codepoints, so we can be faster in some cases.
|
||||||
|
|
||||||
|
for (unsigned int line = 0; state.lines[line]; line++) {
|
||||||
|
state.lines_not_ascii[line] = is_not_ascii(state.lines[line]);
|
||||||
|
}
|
||||||
|
|
||||||
if ( next_pos ) {
|
if ( next_pos ) {
|
||||||
*next_pos = *selected_line;
|
*next_pos = *selected_line;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user