[root@bigdata-3 opt]# yum -y install gcc-c++ lzo-devel zlib-devel autoconf automake libtool
[root@bigdata-3 opt]# wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz
[root@bigdata-3 opt]# tar -zxvf lzo-2.10.tar.gz
[root@bigdata-3 opt]# cd lzo-2.10
[root@bigdata-3 lzo-2.10]# ./configure
[root@bigdata-3 lzo-2.10]#make && make install
[root@bigdata-3 opt]# unzip master.zip
[root@bigdata-3 opt]# cd hadoop-lzo-master
[root@bigdata-3 hadoop-lzo-master]# vi pom.xml
<hadoop.current.version>2.8.3</hadoop.current.version>
[root@bigdata-3 opt]# vi /etc/profile
export C_INCLUDE_PATH=/usr/local/lzo-2.10/include
export LIBRARY_PATH=/usr/local/lzo-2.10/lib
[root@bigdata-3 opt]# source /etc/profile
//有点耗时,建议本地编译
[root@bigdata-3 hadoop-lzo-master]# mvn package -Dmaven.test.skip=true
[root@bigdata-3 target]# scp -r hadoop-lzo-0.4.21-SNAPSHOT.jar /opt/hadoop-2.8.3/share/hadoop/common/
<configuration>
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
</configuration>
[root@bigdata-3 opt]# start-dfs.sh
- 在hive中建表log_start, 指定表格式为lzo
CREATE EXTERNAL TABLE log_start (line string) PARTITIONED BY (dt string)
STORED AS
INPUTFORMAT "com.hadoop.mapred.DeprecatedLzoTextInputFormat"
OUTPUTFORMAT "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
LOCATION '/user/hive/warehouse/game/adzl';
ALTER TABLE old_log_start
SET FILEFORMAT
INPUTFORMAT "com.hadoop.mapred.DeprecatedLzoTextInputFormat"
OUTPUTFORMAT "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat";
- 当需要往lzo存储格式的表新增数据时,需要加入以下两个参数:
SET hive.exec.compress.output=true;
SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec;