flume-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Oleksiy MapR <osayankin.maprt...@gmail.com>
Subject Flume-1.6.0 + HiveSink on Kerberos security cluster
Date Wed, 15 Jul 2015 12:11:34 GMT
Hi team!

I have configured flume-1.6.0 with Kerberos and Hive-1.0 with Kerberos too
and I want flume to put data to hive table using HiveSink, but it gives me
an exception while flume agent was working (Can not connect to metastore).
See details below.

So my question is: does flume-1.6.0 with Kerberos, HiveSink and Hive-1.0
with Kerberos  work together in flume-1.6.0 release?

PS: Flume-1.6.0 + HiveSink works fine on non secure cluster. Also when I
start beeline on
kerberized cluster it works fine and can connect to hive metastore:

hive --service beeline
beeline>  !connect jdbc:hive2://127.0.0.1:10000/default;principal=<user>/<
cluster.name>@MYCOMPANY.COM.UA
scan complete in 6ms
Connecting to jdbc:hive2://127.0.0.1:10000/default;principal=<user>/<
cluster.name>@MYCOMPANY.COM.UA
Enter username for jdbc:hive2://127.0.0.1:10000/default;principal=<user>/<
cluster.name>@MYCOMPANY.COM.UA:
Enter password for jdbc:hive2://127.0.0.1:10000/default;principal=<user>/<
cluster.name>@MYCOMPANY.COM.UA:
Connected to: Apache Hive (***)
Driver: Hive JDBC (***)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://127.0.0.1:10000/default>


Oleksiy

Configuration details:

flume-hivesink.conf
--------------------------

agent1.sources = source1
agent1.channels = channel1
agent1.sinks = sink1

agent1.sources.source1.type = exec
agent1.sources.source1.command = cat /path/to/flume_test.data

agent1.sinks.sink1.type = hive
agent1.sinks.sink1.channel = channel1
agent1.sinks.sink1.hive.metastore = thrift://127.0.0.1:9083
agent1.sinks.sink1.hive.database = default
agent1.sinks.sink1.hive.table = flume_test
agent1.sinks.sink1.hive.txnsPerBatchAsk = 2
agent1.sinks.sink1.batchSize = 4
agent1.sinks.sink1.useLocalTimeStamp = false
agent1.sinks.sink1.round = true
agent1.sinks.sink1.roundValue = 10
agent1.sinks.sink1.roundUnit = minute
agent1.sinks.sink1.serializer = DELIMITED
agent1.sinks.sink1.serializer.delimiter =","
agent1.sinks.sink1.serializer.serdeSeparator =','

agent1.sinks.sink1.serializer.fieldnames = id,message

agent1.channels.channel1.type = FILE
agent1.channels.channel1.transactionCapacity = 1000000
agent1.channels.channel1.checkpointInterval 30000
agent1.channels.channel1.maxFileSize = 2146435071
agent1.channels.channel1.capacity 10000000
agent1.sources.source1.channels = channel1

hbase-agent.sinks.sink1.hdfs.kerberosPrincipal = <user>/<cluster.name>@
MYCOMPANY.COM.UA
hbase-agent.sinks.sink1.hdfs.kerberosKeytab = /path/to/file.keytab


hive-site.xml
-----------------

<configuration>

<!-- MYSQL -->

<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:mysql://localhost/metastore</value>
  <description>the URL of the MySQL database</description>
</property>

<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
  <value>com.mysql.jdbc.Driver</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>root</value>
</property>

<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>******</value>
</property>

<property>
  <name>datanucleus.autoCreateSchema</name>
  <value>false</value>
</property>

<property>
  <name>datanucleus.fixedDatastore</name>
  <value>true</value>
</property>

<property>
  <name>hive.metastore.uris</name>
  <value>thrift://127.0.0.1:9083</value>
  <description>IP address (or fully-qualified domain name) and port of the
metastore host</description>
</property>


<!-- Compactor configuration -->
<property>
  <name>hive.txn.manager</name>
  <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>

<property>
  <name>hive.compactor.initiator.on</name>
  <value>true</value>
</property>

<property>
  <name>hive.compactor.worker.threads</name>
  <value>5</value>
</property>

<property>
  <name>hive.compactor.check.interval</name>
  <value>10</value>
</property>

<property>
  <name>hive.compactor.delta.num.threshold</name>
  <value>2</value>
</property>

<!-- KERBEROS -->

<property>
  <name>hive.metastore.sasl.enabled</name>
  <value>true</value>
  <description>
   if true, the metastore thrift interface will be secured with SASL.
Clients must authenticate with Kerberos.
  </description>
</property>

<property>
  <name>hive.metastore.kerberos.keytab.file</name>
  <value>/path/to/file.keytab</value>
  <description>
    The path to the Kerberos Keytab file containing the metastore thrift
server's service principal.
  </description>
</property>

<property>
  <name>hive.metastore.kerberos.principal</name>
  <value><user>/<cluster.name>@MYCOMPANY.COM.UA</value>
  <description>
    The service principal for the metastore thrift server. The special
string _HOST will be replaced automatically with the correct hostname.
  </description>
</property>

<property>
  <name>hive.server2.authentication</name>
  <value>KERBEROS</value>
  <description>authentication type</description>
</property>

<property>
  <name>hive.server2.authentication.kerberos.principal</name>
  <value><user>/<cluster.name>@MYCOMPANY.COM.UA</value>
  <description>HiveServer2 principal. If _HOST is used as the FQDN portion,
it will be replaced with the actual hostname of the running
instance.</description>
</property>

<property>
  <name>hive.server2.authentication.kerberos.keytab</name>
  <value>/path/to/file.keytab</value>
  <description>Keytab file for HiveServer2 principal</description>
</property>

<property>
  <name>hive.server2.thrift.sasl.qop</name>
  <value>auth-conf</value>
  <description>Sasl QOP value; one of 'auth', 'auth-int' and
'auth-conf'</description>
</property>


</configuration>



Hive table flume_test:
----------------------------

hive> show create table flume_test;
OK
CREATE TABLE `flume_test`(
  `id` string,
  `message` string)
CLUSTERED BY (
  message)
INTO 5 BUCKETS
ROW FORMAT SERDE
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs:/user/hive/warehouse/flume_test'
TBLPROPERTIES (
  'orc.compress'='NONE',
  'transient_lastDdlTime'='1436874059')
Time taken: 0.125 seconds, Fetched: 17 row(s)


Exception
-------------

15/07/15 14:57:33 INFO instrumentation.MonitoredCounterGroup: Monitored
counter group for type: SOURCE, name: source1: Successfully registered new
MBean.
15/07/15 14:57:33 INFO instrumentation.MonitoredCounterGroup: Component
type: SOURCE, name: source1 started
15/07/15 14:57:33 INFO hive.HiveSink: sink1: Creating Writer to Hive end
point : {metaStoreUri='thrift://127.0.0.1:9083', database='default',
table='flume_test', partitionVals=[] }
15/07/15 14:57:33 INFO source.ExecSource: Command [cat
/path/to/flume_test.data] exited with 0
15/07/15 14:57:34 INFO hive.metastore: Trying to connect to metastore with
URI thrift://127.0.0.1:9083
15/07/15 14:57:34 INFO thrift.HadoopThriftAuthBridge25Sasl: Sasl client
AuthenticationMethod: KERBEROS
15/07/15 14:57:34 WARN hive.metastore: Failed to connect to the MetaStore
Server...
15/07/15 14:57:34 INFO hive.metastore: Waiting 1 seconds before next
connection attempt.
15/07/15 14:57:35 INFO hive.metastore: Trying to connect to metastore with
URI thrift://127.0.0.1:9083
15/07/15 14:57:35 INFO thrift.HadoopThriftAuthBridge25Sasl: Sasl client
AuthenticationMethod: KERBEROS
15/07/15 14:57:35 WARN hive.metastore: Failed to connect to the MetaStore
Server...
15/07/15 14:57:35 INFO hive.metastore: Waiting 1 seconds before next
connection attempt.
15/07/15 14:57:36 INFO hive.metastore: Trying to connect to metastore with
URI thrift://127.0.0.1:9083
15/07/15 14:57:36 INFO thrift.HadoopThriftAuthBridge25Sasl: Sasl client
AuthenticationMethod: KERBEROS
15/07/15 14:57:36 WARN hive.metastore: Failed to connect to the MetaStore
Server...
15/07/15 14:57:36 INFO hive.metastore: Waiting 1 seconds before next
connection attempt.
15/07/15 14:57:37 WARN hive.HiveSink: sink1 : Failed connecting to EndPoint
{metaStoreUri='thrift://127.0.0.1:9083', database='default',
table='flume_test', partitionVals=[] }
org.apache.flume.sink.hive.HiveWriter$ConnectException: Failed connecting
to EndPoint {metaStoreUri='thrift://127.0.0.1:9083', database='default',
table='flume_test', partitionVals=[] }
    at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:98)
    at
org.apache.flume.sink.hive.HiveSink.getOrCreateWriter(HiveSink.java:343)
    at org.apache.flume.sink.hive.HiveSink.drainOneBatch(HiveSink.java:296)
    at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:254)
    at
org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:68)
    at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:147)
    at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.flume.sink.hive.HiveWriter$ConnectException: Failed
connecting to EndPoint {metaStoreUri='thrift://127.0.0.1:9083',
database='default', table='flume_test', partitionVals=[] }
    at
org.apache.flume.sink.hive.HiveWriter.newConnection(HiveWriter.java:320)
    at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:86)
    ... 6 more
Caused by: org.apache.hive.hcatalog.streaming.ConnectionError: Error
connecting to Hive Metastore URI: thrift://127.0.0.1:9083
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:450)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:274)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:316)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:313)
    at org.apache.flume.sink.hive.HiveWriter$9.call(HiveWriter.java:366)
    at java.util.concurrent.FutureTask.run(FutureTask.java:262)
    at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    ... 1 more
Caused by: MetaException(message:Could not connect to meta store using any
of the URIs provided. Most recent failure:
org.apache.thrift.transport.TTransportException: Peer indicated failure:
Unsupported mechanism type GSSAPI
    at
org.apache.thrift.transport.TSaslTransport.receiveSaslMessage(TSaslTransport.java:190)
    at
org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:258)
    at
org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:52)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:49)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport.open(TUGIAssumingTransport.java:49)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:373)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:221)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:167)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:448)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:274)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:316)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:313)
    at org.apache.flume.sink.hive.HiveWriter$9.call(HiveWriter.java:366)
    at java.util.concurrent.FutureTask.run(FutureTask.java:262)
    at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:419)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:221)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:167)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:448)
    ... 12 more
15/07/15 14:57:37 ERROR flume.SinkRunner: Unable to deliver event.
Exception follows.
org.apache.flume.EventDeliveryException:
org.apache.flume.sink.hive.HiveWriter$ConnectException: Failed connecting
to EndPoint {metaStoreUri='thrift://127.0.0.1:9083', database='default',
table='flume_test', partitionVals=[] }
    at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:268)
    at
org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:68)
    at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:147)
    at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.flume.sink.hive.HiveWriter$ConnectException: Failed
connecting to EndPoint {metaStoreUri='thrift://127.0.0.1:9083',
database='default', table='flume_test', partitionVals=[] }
    at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:98)
    at
org.apache.flume.sink.hive.HiveSink.getOrCreateWriter(HiveSink.java:343)
    at org.apache.flume.sink.hive.HiveSink.drainOneBatch(HiveSink.java:296)
    at org.apache.flume.sink.hive.HiveSink.process(HiveSink.java:254)
    ... 3 more
Caused by: org.apache.flume.sink.hive.HiveWriter$ConnectException: Failed
connecting to EndPoint {metaStoreUri='thrift://127.0.0.1:9083',
database='default', table='flume_test', partitionVals=[] }
    at
org.apache.flume.sink.hive.HiveWriter.newConnection(HiveWriter.java:320)
    at org.apache.flume.sink.hive.HiveWriter.<init>(HiveWriter.java:86)
    ... 6 more
Caused by: org.apache.hive.hcatalog.streaming.ConnectionError: Error
connecting to Hive Metastore URI: thrift://127.0.0.1:9083
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:450)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:274)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:316)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:313)
    at org.apache.flume.sink.hive.HiveWriter$9.call(HiveWriter.java:366)
    at java.util.concurrent.FutureTask.run(FutureTask.java:262)
    at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    ... 1 more
Caused by: MetaException(message:Could not connect to meta store using any
of the URIs provided. Most recent failure:
org.apache.thrift.transport.TTransportException: Peer indicated failure:
Unsupported mechanism type GSSAPI
    at
org.apache.thrift.transport.TSaslTransport.receiveSaslMessage(TSaslTransport.java:190)
    at
org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:258)
    at
org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:52)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:49)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
    at
org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport.open(TUGIAssumingTransport.java:49)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:373)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:221)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:167)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:448)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:274)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.<init>(HiveEndPoint.java:243)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnectionImpl(HiveEndPoint.java:180)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:157)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint.newConnection(HiveEndPoint.java:110)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:316)
    at org.apache.flume.sink.hive.HiveWriter$6.call(HiveWriter.java:313)
    at org.apache.flume.sink.hive.HiveWriter$9.call(HiveWriter.java:366)
    at java.util.concurrent.FutureTask.run(FutureTask.java:262)
    at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:419)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:221)
    at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:167)
    at
org.apache.hive.hcatalog.streaming.HiveEndPoint$ConnectionImpl.getMetaStoreClient(HiveEndPoint.java:448)
    ... 12 more

Mime
View raw message