/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to you under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ #include #include #include /* Test code for JIRA Issue AVRO-984. * * AVRO-984: Avro-C schema resolution fails on nested array * * This program tests schema resolution for nested arrays. For the * purposes of this test, there are two schemas "old" and "new" which * are created by reading the same JSON schema. * * The test creates and populates a nested array, and serializes it to * memory. The raw memory is written to a file, primarily to decouple * writing and reading. Note that the schema is not written to the * file. The nested array is also printed to the screen. * * The binary file is then read using two separate readers -- the * matched reader and the resolved reader. * * In the matched reader case, the "old" and "new" schemas are known * to match, and therefore no schema resolution is done. The binary * buffer is deserialized into an avro value and the nested array * encoded in the avro value is printed to the screen. * * In the resolved reader case, the "old" and "new" schemas are not * known to match, and therefore schema resolution is performed. (Note * that the schemas *do* match, but we perform schema resolution * anyway, to test the resolution process). The schema resolution * appears to succeed. However, once the code tries to perform an * "avro_value_read()" the code fails to read the nested array into * the avro value. * * Additionally valgrind indicates that conditional jumps are being * performed based on uninitialized values. * * AVRO-C was compiled with CMAKE_INSTALL_PREFIX=avrolib * The static library (libavro.a) was copied into a subdirectory of avrolib/lib/static * * This file was compiled under Linux using: * gcc -g avro-984-test.c -o avro984 -I../../build/avrolib/include -L../../build/avrolib/lib/static -lavro * * The code was tested with valgrind using the command: * valgrind -v --leak-check=full --track-origins=yes ./avro984 * */ // Encode the following json string in NESTED_ARRAY // {"type":"array", "items": {"type": "array", "items": "long"}} // #define NESTED_ARRAY \ "{\"type\":\"array\", \"items\": {\"type\": \"array\", \"items\": \"long\"}}" avro_schema_t schema_old = NULL; avro_schema_t schema_new = NULL; /* Parse schema into a schema data structure */ void init_schema(void) { avro_schema_error_t error; if (avro_schema_from_json(NESTED_ARRAY, sizeof(NESTED_ARRAY), &schema_old, &error)) { printf( "Unable to parse old schema\n"); exit(EXIT_FAILURE); } if (avro_schema_from_json(NESTED_ARRAY, sizeof(NESTED_ARRAY), &schema_new, &error)) { printf( "Unable to parse new schema\n"); exit(EXIT_FAILURE); } } #define try(call, msg) \ do { \ if (call) { \ printf( msg ":\n %s\n", avro_strerror()); \ exit (EXIT_FAILURE); \ } \ } while (0) /* The input avro_value_t p_array should contain a nested array. * Print the fields of this nested array to the screen. */ int print_array_fields ( avro_value_t *p_array ) { size_t idx; size_t length; avro_type_t val_type; val_type = avro_value_get_type( p_array ); printf( "Main array type = %d\n", val_type ); try( avro_value_get_size( p_array, &length ), "Couldn't get array size" ); printf( "Main array length = %d\n", (int) length ); for ( idx = 0; idx < length; idx ++ ) { avro_value_t subarray; size_t sublength; size_t jdx; const char *unused; try ( avro_value_get_by_index( p_array, idx, &subarray, &unused ), "Couldn't get subarray" ); val_type = avro_value_get_type( &subarray ); printf( "Subarray type = %d\n", val_type ); try( avro_value_get_size( &subarray, &sublength ), "Couldn't get subarray size" ); printf( "Subarray length = %d\n", (int) sublength ); for ( jdx = 0; jdx < sublength; jdx++ ) { avro_value_t element; int64_t val; try ( avro_value_get_by_index( &subarray, jdx, &element, &unused ), "Couldn't get subarray element" ); val_type = avro_value_get_type( &element ); try ( avro_value_get_long( &element, &val ), "Couldn't get subarray element value" ); printf( "nested_array[%d][%d]: type = %d value = %lld\n", (int) idx, (int) jdx, (int) val_type, (long long) val ); } } return 0; } /* The input avro_value_t p_subarray should contain an array of long * integers. Add "elements" number of long integers to this array. Set * the values to be distinct based on the iteration parameter. */ int add_subarray( avro_value_t *p_subarray, int32_t elements, int32_t iteration ) { avro_value_t element; size_t index; size_t idx; for ( idx = 0; idx < (size_t) elements; idx ++ ) { // Append avro array element to subarray try ( avro_value_append( p_subarray, &element, &index ), "Error appending element in subarray" ); try ( avro_value_set_long( &element, (iteration+1)*100 + (iteration+1) ), "Error setting subarray element" ); } return 0; } /* Create a nested array using the schema NESTED_ARRAY. Populate its * elements with unique values. Serialize the nested array to the * memory buffer in avro_writer_t. The number of elements in the first * dimension of the nested array is "elements". The number of elements * in the second dimension of the nested array is hardcoded to 2. */ int add_array( avro_writer_t writer, int32_t elements ) { avro_schema_t chosen_schema; avro_value_iface_t *nested_array_class; avro_value_t nested; int32_t idx; // Select (hardcode) schema to use chosen_schema = schema_old; // Create avro class and value nested_array_class = avro_generic_class_from_schema( chosen_schema ); try ( avro_generic_value_new( nested_array_class, &nested ), "Error creating instance of record" ); for ( idx = 0; idx < elements; idx ++ ) { avro_value_t subarray; size_t index; // Append avro array element for top level array try ( avro_value_append( &nested, &subarray, &index ), "Error appending subarray" ); // Populate array element with subarray of length 2 #define SUBARRAY_LENGTH (2) try ( add_subarray( &subarray, SUBARRAY_LENGTH, idx ), "Error populating subarray" ); } // Write the value to memory try ( avro_value_write( writer, &nested ), "Unable to write nested into memory" ); print_array_fields( &nested ); // Release the record avro_value_decref( &nested ); avro_value_iface_decref( nested_array_class ); return 0; } /* Create a raw binary file containing a serialized version of a * nested array. This file will later be read by * read_nested_array_file(). */ int write_nested_array_file ( int64_t buf_len, const char *raw_binary_file_name ) { char *buf; avro_writer_t nested_writer; FILE *fid = NULL; fprintf( stdout, "Create %s\n", raw_binary_file_name ); // Allocate a buffer buf = (char *) malloc( buf_len * sizeof( char ) ); if ( buf == NULL ) { printf( "There was an error creating the nested buffer %s.\n", raw_binary_file_name); exit(EXIT_FAILURE); } /* Create a new memory writer */ nested_writer = avro_writer_memory( buf, buf_len ); if ( nested_writer == NULL ) { printf( "There was an error creating the buffer for writing %s.\n", raw_binary_file_name); exit(EXIT_FAILURE); } /* Add an array containing 4 subarrays */ printf( "before avro_writer_tell %d\n", (int) avro_writer_tell( nested_writer ) ); #define ARRAY_LENGTH (4) add_array( nested_writer, ARRAY_LENGTH ); printf( "after avro_writer_tell %d\n", (int) avro_writer_tell( nested_writer ) ); /* Serialize the nested array */ printf( "Serialize the data to a file\n"); /* Delete the nested array if it exists, and create a new one */ remove(raw_binary_file_name); fid = fopen( raw_binary_file_name, "w+"); if ( fid == NULL ) { printf( "There was an error creating the file %s.\n", raw_binary_file_name); exit(EXIT_FAILURE); } fwrite( buf, 1, avro_writer_tell( nested_writer ), fid ); fclose(fid); avro_writer_free( nested_writer ); free(buf); return 0; } /* Read the raw binary file containing a serialized version of a * nested array, written by write_nested_array_file() */ int read_nested_array_file ( int64_t buf_len, const char *raw_binary_file_name, avro_schema_t writer_schema, avro_schema_t reader_schema, int use_resolving_reader ) { char *buf; FILE *fid = NULL; avro_reader_t nested_reader; int64_t file_len; // For Matched Reader and Resolving Reader avro_value_iface_t *reader_class; avro_value_t nested; // For Resolving Reader avro_value_iface_t *resolver; avro_value_t resolved_value; fprintf( stdout, "Use %s reader\n", use_resolving_reader ? "Resolving":"Matched" ); // Allocate a buffer buf = (char *) calloc( buf_len, sizeof( char ) ); if ( buf == NULL ) { printf( "There was an error creating the buffer for reading %s.\n", raw_binary_file_name); exit(EXIT_FAILURE); } // Start with a garbage buffer memset(buf, 0xff, buf_len ); // Read the file into the buffer fid = fopen( raw_binary_file_name, "r" ); if ( fid == NULL ) { printf( "There was an error reading the file %s.\n", raw_binary_file_name); exit(EXIT_FAILURE); } file_len = fread( buf, 1, buf_len, fid ); printf( "Read %d bytes\n", (int) file_len ); fclose(fid); if ( use_resolving_reader ) { // Resolving Reader /* First resolve the writer and reader schemas */ resolver = avro_resolved_writer_new( writer_schema, reader_schema ); if ( !resolver ) { printf( "Could not create resolver\n"); free(buf); exit(EXIT_FAILURE); } /* Create a value that the resolver can write into. This is just * an interface value, that is not directly read from. */ if ( avro_resolved_writer_new_value( resolver, &resolved_value ) ) { avro_value_iface_decref( resolver ); free(buf); exit(EXIT_FAILURE); } /* Then create the value with the reader schema, that we are going * to use to read from. */ reader_class = avro_generic_class_from_schema(reader_schema); try ( avro_generic_value_new( reader_class, &nested ), "Error creating instance of nested array" ); // When we read the memory using the resolved writer, we want to // populate the instance of the value with the reader schema. This // is done by set_dest. avro_resolved_writer_set_dest(&resolved_value, &nested); // Create a memory reader nested_reader = avro_reader_memory( buf, buf_len ); if ( avro_value_read( nested_reader, &resolved_value ) ) { printf( "Avro value read failed\n" ); avro_value_decref( &nested ); avro_value_iface_decref( reader_class ); avro_value_iface_decref( resolver ); avro_value_decref( &resolved_value ); exit(EXIT_FAILURE); } } else { // Matched Reader reader_class = avro_generic_class_from_schema(reader_schema); try ( avro_generic_value_new( reader_class, &nested ), "Error creating instance of nested array" ); // Send the memory in the buffer into the reader nested_reader = avro_reader_memory( buf, buf_len ); try ( avro_value_read( nested_reader, &nested ), "Could not read value from memory" ); } /* Now the resolved record has been read into "nested" which is * a value of type reader_class */ print_array_fields( &nested ); if ( use_resolving_reader ) { // Resolving Reader avro_value_decref( &nested ); avro_value_iface_decref( reader_class ); avro_value_iface_decref( resolver ); avro_value_decref( &resolved_value ); } else { // Matched Reader avro_value_decref( &nested ); avro_value_iface_decref( reader_class ); } fprintf( stdout, "Done.\n\n"); avro_reader_free( nested_reader ); free(buf); return 0; } /* Top level function to impelement a test for the JIRA issue * AVRO-984. See detailed documentation at the top of this file. */ int main(void) { const char *raw_binary_file_name = "nested_array.bin"; int64_t buf_len = 2048; int use_resolving_reader; /* Initialize the schema structure from JSON */ init_schema(); printf( "Write the serialized nested array to %s\n", raw_binary_file_name ); write_nested_array_file( buf_len, raw_binary_file_name ); printf("\nNow read all the array back out\n\n"); for ( use_resolving_reader = 0; use_resolving_reader < 2; use_resolving_reader++ ) { read_nested_array_file( buf_len, raw_binary_file_name, schema_old, schema_new, use_resolving_reader ); } // Close out schemas avro_schema_decref(schema_old); avro_schema_decref(schema_new); // Remove the binary file remove(raw_binary_file_name); printf("\n"); return 0; }