diff options
Diffstat (limited to '')
-rw-r--r-- | third_party/rust/rure/examples/iter.c | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/third_party/rust/rure/examples/iter.c b/third_party/rust/rure/examples/iter.c new file mode 100644 index 0000000000..47c83e806f --- /dev/null +++ b/third_party/rust/rure/examples/iter.c @@ -0,0 +1,99 @@ +/* + * This example code shows how to iterate over all regex matches in a file, + * emit the match location and print the contents of a capturing group. + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "rure.h" + +int main() { + /* Open a file and mmap it. */ + int fd = open("sherlock.txt", O_RDONLY); + if (fd == -1) { + perror("failed to open sherlock.txt"); + exit(1); + } + struct stat status; + if (fstat(fd, &status) == -1) { + perror("failed to stat sherlock.txt"); + exit(1); + } + if ((uintmax_t)status.st_size > SIZE_MAX) { + perror("file too big"); + exit(1); + } + if (status.st_size == 0) { + perror("file empty"); + exit(1); + } + size_t sherlock_len = (size_t)status.st_size; + const uint8_t *sherlock = (const uint8_t *)mmap( + NULL, status.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (sherlock == MAP_FAILED) { + perror("could not mmap file"); + exit(1); + } + + /* + * Compile the regular expression. A more convenient routine, + * rure_compile_must, is also available, which will abort the process if + * and print an error message to stderr if the regex compilation fails. + * We show the full gory details here as an example. + */ + const char *pattern = "(\\w+)\\s+Holmes"; + size_t pattern_len = strlen(pattern); + rure_error *err = rure_error_new(); + rure *re = rure_compile((const uint8_t *)pattern, pattern_len, + RURE_FLAG_UNICODE | RURE_FLAG_CASEI, NULL, err); + if (NULL == re) { + /* A null regex means compilation failed and an error exists. */ + printf("compilation of %s failed: %s\n", + pattern, rure_error_message(err)); + rure_error_free(err); + munmap((char*)sherlock, sherlock_len); + exit(1); + } + rure_error_free(err); + + /* + * Create an iterator to find all successive non-overlapping matches. + * For each match, we extract the location of the capturing group. + */ + rure_match group0 = {0}; + rure_match group1 = {0}; + rure_captures *caps = rure_captures_new(re); + rure_iter *it = rure_iter_new(re); + + while (rure_iter_next_captures(it, sherlock, sherlock_len, caps)) { + /* + * Get the location of the full match and the capturing group. + * We know that both accesses are successful since the body of the + * loop only executes if there is a match and both capture groups + * must match in order for the entire regex to match. + * + * N.B. The zeroth group corresponds to the full match of the regex. + */ + rure_captures_at(caps, 0, &group0); + rure_captures_at(caps, 1, &group1); + printf("%.*s (match at: %zu, %zu)\n", + (int)(group1.end - group1.start), + sherlock + group1.start, + group0.start, group0.end); + } + + /* Free all our resources. */ + munmap((char*)sherlock, sherlock_len); + rure_captures_free(caps); + rure_iter_free(it); + rure_free(re); + return 0; +} |