From a23d7c18bd321b50d12b741e744bd78bbde012f5 Mon Sep 17 00:00:00 2001 From: Aaron Niskode-Dossett Date: Mon, 29 Dec 2025 08:38:55 -0600 Subject: [PATCH 1/2] [parquet-thrift] remove pig usage --- parquet-thrift/pom.xml | 26 +-- .../thrift/AbstractThriftWriteSupport.java | 19 -- .../thrift/pig/ParquetThriftStorer.java | 91 --------- .../thrift/pig/TupleToThriftWriteSupport.java | 86 --------- .../parquet/thrift/struct/ThriftType.java | 1 + .../thrift/TestParquetWriteProtocol.java | 93 --------- .../thrift/TestThriftToPigCompatibility.java | 178 ------------------ .../thrift/pig/TestParquetThriftStorer.java | 82 -------- 8 files changed, 2 insertions(+), 574 deletions(-) delete mode 100644 parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java delete mode 100644 parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java delete mode 100644 parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java delete mode 100644 parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml index ade1ec3cdc..7fbd7cb646 100644 --- a/parquet-thrift/pom.xml +++ b/parquet-thrift/pom.xml @@ -70,7 +70,7 @@ provided + explicitly declare it as a test dependency. --> com.google.guava guava @@ -89,18 +89,6 @@ - - com.twitter.elephantbird - elephant-bird-pig - ${elephant-bird.version} - - - - com.hadoop.gplcompression - hadoop-lzo - - - org.apache.parquet parquet-jackson @@ -124,18 +112,6 @@ test-jar test - - org.apache.parquet - parquet-pig - 1.15.0 - - - org.apache.pig - pig - ${pig.version} - ${pig.classifier} - provided - org.apache.thrift libthrift diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java index dda9aadfb9..cb8a4c36b6 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/AbstractThriftWriteSupport.java @@ -15,7 +15,6 @@ */ package org.apache.parquet.hadoop.thrift; -import com.twitter.elephantbird.pig.util.ThriftToPig; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.conf.HadoopParquetConfiguration; @@ -25,13 +24,11 @@ import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.api.RecordConsumer; -import org.apache.parquet.pig.PigMetaData; import org.apache.parquet.schema.MessageType; import org.apache.parquet.thrift.ParquetWriteProtocol; import org.apache.parquet.thrift.ThriftMetaData; import org.apache.parquet.thrift.ThriftSchemaConverter; import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.thrift.TBase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -99,26 +96,10 @@ protected void init(Class thriftClass) { final Map extraMetaData = new ThriftMetaData(thriftClass.getName(), thriftStruct).toExtraMetaData(); - // adding the Pig schema as it would have been mapped from thrift - // TODO: make this work for non-tbase types - if (isPigLoaded() && TBase.class.isAssignableFrom(thriftClass)) { - new PigMetaData(new ThriftToPig((Class>) thriftClass).toSchema()) - .addToMetaData(extraMetaData); - } this.writeContext = new WriteContext(schema, extraMetaData); } - protected boolean isPigLoaded() { - try { - Class.forName("org.apache.pig.impl.logicalLayer.schema.Schema"); - return true; - } catch (ClassNotFoundException e) { - LOG.info("Pig is not loaded, pig metadata will not be written"); - return false; - } - } - @Override public WriteContext init(Configuration configuration) { return init(new HadoopParquetConfiguration(configuration)); diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java deleted file mode 100644 index aec21eff4a..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/ParquetThriftStorer.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import java.io.IOException; -import java.util.Arrays; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.parquet.hadoop.ParquetOutputFormat; -import org.apache.parquet.io.ParquetEncodingException; -import org.apache.pig.StoreFunc; -import org.apache.pig.data.Tuple; - -/** - * To store in Pig using a thrift class - * usage: - * STORE 'foo' USING parquet.thrift.pig.ParquetThriftStorer('my.thrift.Class'); - * - * @deprecated will be removed in 1.17.0 or 2.0.0 - */ -@Deprecated -public class ParquetThriftStorer extends StoreFunc { - - private RecordWriter recordWriter; - - private String className; - - public ParquetThriftStorer(String[] params) { - if (params == null || params.length != 1) { - throw new IllegalArgumentException( - "required the thrift class name in parameter. Got " + Arrays.toString(params) + " instead"); - } - className = params[0]; - } - - /** - * {@inheritDoc} - */ - @Override - public OutputFormat getOutputFormat() throws IOException { - return new ParquetOutputFormat(new TupleToThriftWriteSupport(className)); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings({"rawtypes", "unchecked"}) // that's how the base class is defined - @Override - public void prepareToWrite(RecordWriter recordWriter) throws IOException { - this.recordWriter = recordWriter; - } - - /** - * {@inheritDoc} - */ - @Override - public void putNext(Tuple tuple) throws IOException { - try { - this.recordWriter.write(null, tuple); - } catch (InterruptedException e) { - throw new ParquetEncodingException("Interrupted while writing", e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void setStoreLocation(String location, Job job) throws IOException { - FileOutputFormat.setOutputPath(job, new Path(location)); - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java deleted file mode 100644 index 395cf70f7e..0000000000 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import com.twitter.elephantbird.pig.util.PigToThrift; -import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.conf.HadoopParquetConfiguration; -import org.apache.parquet.conf.ParquetConfiguration; -import org.apache.parquet.hadoop.BadConfigurationException; -import org.apache.parquet.hadoop.api.WriteSupport; -import org.apache.parquet.hadoop.thrift.ThriftWriteSupport; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.pig.data.Tuple; -import org.apache.thrift.TBase; - -/** - * Stores Pig tuples as Thrift objects - * - * @deprecated will be removed in 1.17.0 or 2.0.0 - */ -@Deprecated -public class TupleToThriftWriteSupport extends WriteSupport { - - private final String className; - private ThriftWriteSupport> thriftWriteSupport; - private PigToThrift> pigToThrift; - - /** - * @param className the thrift class name - */ - public TupleToThriftWriteSupport(String className) { - super(); - this.className = className; - } - - @Override - public String getName() { - return "thrift"; - } - - @Override - public WriteContext init(Configuration configuration) { - return init(new HadoopParquetConfiguration(configuration)); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - @Override - public WriteContext init(ParquetConfiguration configuration) { - try { - Class clazz = configuration.getClassByName(className).asSubclass(TBase.class); - thriftWriteSupport = new ThriftWriteSupport(clazz); - pigToThrift = new PigToThrift(clazz); - return thriftWriteSupport.init(configuration); - } catch (ClassNotFoundException e) { - throw new BadConfigurationException("The thrift class name was not found: " + className, e); - } catch (ClassCastException e) { - throw new BadConfigurationException("The thrift class name should extend TBase: " + className, e); - } - } - - @Override - public void prepareForWrite(RecordConsumer recordConsumer) { - thriftWriteSupport.prepareForWrite(recordConsumer); - } - - @Override - public void write(Tuple t) { - thriftWriteSupport.write(pigToThrift.getThriftObject(t)); - } -} diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java index 264790333a..c7aa12b3aa 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java @@ -142,6 +142,7 @@ default R visit(UUIDType uuidType, S state) { /** * @deprecated will be removed in 2.0.0; use StateVisitor instead. */ + @Deprecated public interface TypeVisitor { void visit(MapType mapType); diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java index 9e562bf734..d11be4af3c 100644 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java +++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestParquetWriteProtocol.java @@ -23,7 +23,6 @@ import com.twitter.data.proto.tutorial.thrift.Person; import com.twitter.data.proto.tutorial.thrift.PhoneNumber; import com.twitter.data.proto.tutorial.thrift.PhoneType; -import com.twitter.elephantbird.pig.util.ThriftToPig; import com.twitter.elephantbird.thrift.test.TestMap; import com.twitter.elephantbird.thrift.test.TestMapInList; import com.twitter.elephantbird.thrift.test.TestMapInSet; @@ -45,20 +44,14 @@ import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; -import org.apache.parquet.conf.ParquetConfiguration; import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.ExpectationValidatingRecordConsumer; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.RecordConsumerLoggingWrapper; -import org.apache.parquet.pig.PigSchemaConverter; -import org.apache.parquet.pig.TupleWriteSupport; import org.apache.parquet.schema.MessageType; import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.thrift.TBase; import org.apache.thrift.TException; -import org.junit.ComparisonFailure; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -133,59 +126,20 @@ public void testMap() throws Exception { map.put("foo", "bar"); map.put("foo2", "bar2"); TestMap testMap = new TestMap("map_name", map); - try { - validatePig(expectations, testMap); - } catch (ComparisonFailure e) { - // This can happen despite using a stable TreeMap, since ThriftToPig#toPigMap - // in com.twitter.elephantbird.pig.util creates a HashMap. - // So we test with the map elements in reverse order - validatePig(expectationsAlt, testMap); - } validateThrift(expectations, testMap); } /** * @throws Exception - * @see TestThriftToPigCompatibility */ @Test public void testMapInSet() throws Exception { - String[] pigExpectations = { - "startMessage()", - "startField(name, 0)", - "addBinary(top)", - "endField(name, 0)", - "startField(names, 1)", // set: optional field - "startGroup()", - "startField(t, 0)", // repeated field - "startGroup()", - "startField(names_tuple, 0)", // map: optional field - "startGroup()", - "startField(key_value, 0)", // repeated field - "startGroup()", - "startField(key, 0)", // key - "addBinary(foo)", - "endField(key, 0)", - "startField(value, 1)", // value - "addBinary(bar)", - "endField(value, 1)", - "endGroup()", - "endField(key_value, 0)", - "endGroup()", - "endField(names_tuple, 0)", - "endGroup()", - "endField(t, 0)", - "endGroup()", - "endField(names, 1)", - "endMessage()" - }; final Set> set = new HashSet>(); final Map map = new HashMap(); map.put("foo", "bar"); set.add(map); TestMapInSet o = new TestMapInSet("top", set); - validatePig(pigExpectations, o); String[] expectationsThrift = { "startMessage()", @@ -217,7 +171,6 @@ public void testMapInSet() throws Exception { /** * @throws TException - * @see TestThriftToPigCompatibility */ @Test public void testNameList() throws TException { @@ -226,31 +179,6 @@ public void testNameList() throws TException { names.add("Jack"); final TestNameList o = new TestNameList("name", names); - String[] pigExpectations = { - "startMessage()", - "startField(name, 0)", - "addBinary(name)", - "endField(name, 0)", - "startField(names, 1)", - "startGroup()", - "startField(t, 0)", - "startGroup()", - "startField(names_tuple, 0)", - "addBinary(John)", - "endField(names_tuple, 0)", - "endGroup()", - "startGroup()", - "startField(names_tuple, 0)", - "addBinary(Jack)", - "endField(names_tuple, 0)", - "endGroup()", - "endField(t, 0)", - "endGroup()", - "endField(names, 1)", - "endMessage()" - }; - validatePig(pigExpectations, o); - String[] expectations = { "startMessage()", "startField(name, 0)", @@ -326,7 +254,6 @@ public void testStructInMap() throws Exception { map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap())); final Map stringToIntMap = Collections.singletonMap("bar", 10); TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap); - validatePig(expectations, testMap); validateThrift(expectations, testMap); } @@ -341,7 +268,6 @@ public void testProtocolEmptyAdressBook() throws Exception { "endMessage()" }; AddressBook a = new AddressBook(new ArrayList()); - validatePig(expectations, a); validateThrift(expectations, a); } @@ -442,7 +368,6 @@ public void testProtocolAddressBook() throws Exception { "dick@richardson.com", Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555 999 9996")))); AddressBook a = new AddressBook(persons); - validatePig(expectations, a); // naming conventions are slightly different for the bag inner tuple. The reader should ignore this. String[] expectationsThrift = Arrays.copyOf(expectations, expectations.length, String[].class); expectationsThrift[3] = "startField(persons_tuple, 0)"; @@ -520,7 +445,6 @@ public void testOneOfEach() throws TException { new ArrayList(), new ArrayList(), new ArrayList()); - validatePig(expectations, a); String[] thriftExpectations = Arrays.copyOf(expectations, expectations.length, String[].class); thriftExpectations[2] = "addBoolean(true)"; // Elephant bird maps booleans to int thriftExpectations[5] = "addBoolean(false)"; @@ -709,21 +633,4 @@ private void validateThrift(Configuration configuration, String[] expectations, configuration, new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType); a.write(p); } - - private MessageType validatePig(String[] expectations, TBase a) { - ThriftToPig> thriftToPig = new ThriftToPig(a.getClass()); - ExpectationValidatingRecordConsumer recordConsumer = - new ExpectationValidatingRecordConsumer(new ArrayDeque(Arrays.asList(expectations))); - Schema pigSchema = thriftToPig.toSchema(); - LOG.info("{}", pigSchema); - MessageType schema = new PigSchemaConverter().convert(pigSchema); - LOG.info("{}", schema); - TupleWriteSupport tupleWriteSupport = new TupleWriteSupport(pigSchema); - tupleWriteSupport.init((ParquetConfiguration) null); - tupleWriteSupport.prepareForWrite(recordConsumer); - final Tuple pigTuple = thriftToPig.getPigTuple(a); - LOG.info("{}", pigTuple); - tupleWriteSupport.write(pigTuple); - return schema; - } } diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java deleted file mode 100644 index 43cc52673d..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/TestThriftToPigCompatibility.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift; - -import static org.junit.Assert.assertEquals; - -import com.twitter.data.proto.tutorial.thrift.AddressBook; -import com.twitter.data.proto.tutorial.thrift.Name; -import com.twitter.data.proto.tutorial.thrift.Person; -import com.twitter.data.proto.tutorial.thrift.PhoneNumber; -import com.twitter.data.proto.tutorial.thrift.PhoneType; -import com.twitter.elephantbird.pig.util.ThriftToPig; -import com.twitter.elephantbird.thrift.test.TestMap; -import com.twitter.elephantbird.thrift.test.TestMapInSet; -import com.twitter.elephantbird.thrift.test.TestName; -import com.twitter.elephantbird.thrift.test.TestNameList; -import com.twitter.elephantbird.thrift.test.TestPerson; -import com.twitter.elephantbird.thrift.test.TestPhoneType; -import com.twitter.elephantbird.thrift.test.TestStructInMap; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import org.apache.parquet.io.ColumnIOFactory; -import org.apache.parquet.io.ConverterConsumer; -import org.apache.parquet.io.MessageColumnIO; -import org.apache.parquet.io.RecordConsumerLoggingWrapper; -import org.apache.parquet.io.api.RecordConsumer; -import org.apache.parquet.pig.PigSchemaConverter; -import org.apache.parquet.pig.convert.TupleRecordMaterializer; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.thrift.struct.ThriftType.StructType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.thrift.TBase; -import org.apache.thrift.TException; -import org.junit.Test; -import thrift.test.OneOfEach; - -public class TestThriftToPigCompatibility { - - public void testMap() throws Exception { - Map map = new TreeMap(); - map.put("foo", "bar"); - map.put("foo2", "bar2"); - TestMap testMap = new TestMap("map_name", map); - validateSameTupleAsEB(testMap); - } - - @Test - public void testMapInSet() throws Exception { - final Set> set = new HashSet>(); - final Map map = new HashMap(); - map.put("foo", "bar"); - set.add(map); - TestMapInSet o = new TestMapInSet("top", set); - validateSameTupleAsEB(o); - } - - @Test - public void testStructInMap() throws Exception { - - final Map map = new HashMap(); - map.put("foo", new TestPerson(new TestName("john", "johnson"), new HashMap())); - final Map stringToIntMap = Collections.singletonMap("bar", 10); - TestStructInMap testMap = new TestStructInMap("map_name", map, stringToIntMap); - validateSameTupleAsEB(testMap); - } - - @Test - public void testProtocolEmptyAdressBook() throws Exception { - - AddressBook a = new AddressBook(new ArrayList()); - validateSameTupleAsEB(a); - } - - @Test - public void testProtocolAddressBook() throws Exception { - ArrayList persons = new ArrayList(); - final PhoneNumber phoneNumber = new PhoneNumber("555 999 9998"); - phoneNumber.type = PhoneType.HOME; - persons.add(new Person( - new Name("Bob", "Roberts"), - 1, - "bob@roberts.com", - Arrays.asList(new PhoneNumber("555 999 9999"), phoneNumber))); - persons.add(new Person( - new Name("Dick", "Richardson"), - 2, - "dick@richardson.com", - Arrays.asList(new PhoneNumber("555 999 9997"), new PhoneNumber("555 999 9996")))); - AddressBook a = new AddressBook(persons); - validateSameTupleAsEB(a); - } - - @Test - public void testOneOfEach() throws Exception { - OneOfEach a = new OneOfEach( - true, - false, - (byte) 8, - (short) 16, - (int) 32, - (long) 64, - (double) 1234, - "string", - "å", - false, - ByteBuffer.wrap("a".getBytes()), - new ArrayList(), - new ArrayList(), - new ArrayList()); - validateSameTupleAsEB(a); - } - - @Test - public void testStringList() throws Exception { - final List names = new ArrayList(); - names.add("John"); - names.add("Jack"); - TestNameList o = new TestNameList("name", names); - validateSameTupleAsEB(o); - } - - /** - *
    steps: - *
  • Writes using the thrift mapping - *
  • Reads using the pig mapping - *
  • Use Elephant bird to convert from thrift to pig - *
  • Check that both transformations give the same result - * - * @param o the object to convert - * @throws TException - */ - public static > void validateSameTupleAsEB(T o) throws TException { - final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter(); - @SuppressWarnings("unchecked") - final Class class1 = (Class) o.getClass(); - final MessageType schema = thriftSchemaConverter.convert(class1); - - final StructType structType = ThriftSchemaConverter.toStructType(class1); - final ThriftToPig thriftToPig = new ThriftToPig(class1); - final Schema pigSchema = thriftToPig.toSchema(); - final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true); - RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema); - final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); - ParquetWriteProtocol p = - new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType); - o.write(p); - final Tuple t = tupleRecordConverter.getCurrentRecord(); - final Tuple expected = thriftToPig.getPigTuple(o); - assertEquals(expected.toString(), t.toString()); - final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema); - assertEquals(schema.toString(), filtered.toString()); - } -} diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java deleted file mode 100644 index c021cd2d22..0000000000 --- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/pig/TestParquetThriftStorer.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.parquet.thrift.pig; - -import static org.apache.pig.builtin.mock.Storage.tuple; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Properties; -import org.apache.parquet.pig.ParquetLoader; -import org.apache.parquet.thrift.test.Name; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.backend.executionengine.ExecJob.JOB_STATUS; -import org.apache.pig.builtin.mock.Storage; -import org.apache.pig.builtin.mock.Storage.Data; -import org.apache.pig.data.Tuple; -import org.junit.Test; - -public class TestParquetThriftStorer { - @Test - public void testStorer() throws ExecException, Exception { - String out = "target/out"; - int rows = 1000; - Properties props = new Properties(); - props.setProperty("parquet.compression", "uncompressed"); - props.setProperty("parquet.page.size", "1000"); - PigServer pigServer = new PigServer(ExecType.LOCAL, props); - Data data = Storage.resetData(pigServer); - Collection list = new ArrayList(); - for (int i = 0; i < rows; i++) { - list.add(tuple("bob", "roberts" + i)); - } - data.set("in", "fn:chararray, ln:chararray", list); - pigServer.deleteFile(out); - pigServer.setBatchOn(); - pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); - pigServer.registerQuery("Store A into '" + out + "' using " + ParquetThriftStorer.class.getName() + "('" - + Name.class.getName() + "');"); - execBatch(pigServer); - - pigServer.registerQuery("B = LOAD '" + out + "' USING " + ParquetLoader.class.getName() + "();"); - pigServer.registerQuery("Store B into 'out' using mock.Storage();"); - execBatch(pigServer); - - List result = data.get("out"); - - assertEquals(rows, result.size()); - int i = 0; - for (Tuple tuple : result) { - assertEquals(tuple("bob", "roberts" + i), tuple); - ++i; - } - } - - private void execBatch(PigServer pigServer) throws IOException { - if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) { - throw new RuntimeException( - "Job failed", pigServer.executeBatch().get(0).getException()); - } - } -} From 6fdfdd80db67ca1e550dba26e8a683c8d9797fb5 Mon Sep 17 00:00:00 2001 From: Aaron Niskode-Dossett Date: Mon, 29 Dec 2025 08:44:11 -0600 Subject: [PATCH 2/2] final pig cleanups --- .github/ISSUE_TEMPLATE/feature_request.yaml | 1 - README.md | 1 - pom.xml | 2 -- 3 files changed, 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index e1f580ed32..084c3afe16 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -40,7 +40,6 @@ body: - Build - Arrow - Avro - - Pig - Protobuf - Thrift - CLI diff --git a/README.md b/README.md index ff6f162151..905229082c 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,6 @@ Parquet is an active project, and new features are being added quickly. Here are * Type-specific encoding * Hive integration (deprecated) -* Pig integration (deprecated) * Cascading integration (deprecated) * Crunch integration * Apache Arrow integration diff --git a/pom.xml b/pom.xml index a4c3701df9..fd5b443427 100644 --- a/pom.xml +++ b/pom.xml @@ -87,8 +87,6 @@ 1.16.0 thrift ${thrift.executable} - 0.16.0 - h2 0.10.0 0.22.0 ${thrift.version}