I get a ParseError("bad varint") on this test (added to ./arrow-avro/src/reader/mod.rs):
fn get_int_array_schema() -> AvroSchema {
let schema = Schema::new(vec![Field::new(
"int_array",
DataType::List(Arc::new(Field::new("element", DataType::Int32, true))),
true,
)])
.with_metadata(HashMap::from([("avro.name".into(), "table".into())]));
AvroSchema::try_from(&schema).unwrap()
}
#[test]
fn test_bad_varint_bug() {
let path = arrow_test_data("avro/bad-varint-bug.avro");
let schema = get_int_array_schema();
let batch = read_alltypes_with_reader_schema(&path, schema);
let int_list_col = batch.column(0).as_list::<i32>();
let first_list = int_list_col.value(0);
let expected_result = Arc::new(Int32Array::from_iter_values(vec![1i32, 2])) as _;
assert_eq!(first_list, expected_result)
}
The Avro file, readable by Spark: bad-varint-bug.avro.gz
Originally posted by @mzabaluev in #8930 (comment)