nimbus 英 [?n?mb?s] 美 [?n?mb?s] n. (大片的)雨云;光环
strom 分布式实时的流式计算框架
strom如下图右侧,来一个数据,处理一个,单位时间内处理的数据量不能太大,以保证它的正常运行,但是一旦启动一直运行。
批处理则不同,spark则是微批处理框架的计算框架,也能够达到实时性。
MR 做不到实时性,数量级是TB,PB级的,频繁操作磁盘,频繁启停job.






ETL(数据清洗)extracted transform load
Spout
英 [spa?t] 美 [spa?t]
壶嘴;喷出;喷口;管口;龙卷
bolt
英 [b??lt] 美 [bo?lt]
n.
(门窗的)闩,插v.
用插销闩上;能被闩上;用螺栓把(甲和乙)固定在一起;(马等受惊)脱缰 adv. 突然地;像箭似地;直立地
Nimbus 类似于 master supervisor 类似于 slave
worker task












ack机制无法保证数据不被重复计算,但是可以保证数据至少被正确处理一次。(可能因错误,引发非错误数据重发被计算两次)
package com.sxt.storm.ack;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class MySpout implements IRichSpout{
private static final long serialVersionUID = 1L;
int index = 0;
FileInputStream fis;
InputStreamReader isr;
BufferedReader br;
SpoutOutputCollector collector = null;
String str = null;
@Override
public void nextTuple() {
try {
if ((str = this.br.readLine()) != null) {
// 过滤动作
index++;
collector.emit(new Values(str), index);
// collector.emit(new Values(str));
}
} catch (Exception e) {
}
}
@Override
public void close() {
try {
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
try {
this.collector = collector;
this.fis = new FileInputStream("track.log");
this.isr = new InputStreamReader(fis, "UTF-8");
this.br = new BufferedReader(isr);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("log"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void ack(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout ack:"+msgId.toString());
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
@Override
public void fail(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout fail:"+msgId.toString());
}
}
package com.sxt.storm.ack;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class MyBolt implements IRichBolt {
private static final long serialVersionUID = 1L;
OutputCollector collector = null;
@Override
public void cleanup() {
}
int num = 0;
String valueString = null;
@Override
public void execute(Tuple input) {
try {
valueString = input.getStringByField("log") ;
if(valueString != null) {
num ++ ;
System.err.println(Thread.currentThread().getName()+" lines :"+num +" session_id:"+valueString.split("\t")[1]);
}
collector.emit(input, new Values(valueString));
// collector.emit(new Values(valueString));
collector.ack(input);
Thread.sleep(2000);
} catch (Exception e) {
collector.fail(input);
e.printStackTrace();
}
}
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector ;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("session_id")) ;
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
package com.sxt.storm.ack;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MySpout(), 1);
builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout");
// Map conf = new HashMap();
// conf.put(Config.TOPOLOGY_WORKERS, 4);
Config conf = new Config() ;
conf.setDebug(true);
conf.setMessageTimeoutSecs(conf, 100);
conf.setNumAckers(4);
if (args.length > 0) {
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
}else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}

单点故障, flume ha 单点瓶颈, load balance http://flume.apache.org/FlumeUserGuide.html#scribe-source 美团日志收集系统架构 https://tech.meituan.com/2013/12/09/meituan-flume-log-system-architecture-and-design.html 实例: 电话掉话率,(非正常挂断:没有声音了,不在服务区)
中国移动项目架构图:

kafka 创建topic
./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 3 --topic mylog_cmcc
## 启动消费
./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic mylog_cmcc
// 下边程序用于生成生产数据。
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.productor;
import java.util.Properties;
import java.util.Random;
import backtype.storm.utils.Utils;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import tools.DateFmt;
/***
* 模拟发送数据到kafka中
*
* @author hadoop
*
*/
public class CellProducer extends Thread {
// bin/kafka-topics.sh --create --zookeeper localhost:2181
// --replication-factor 3 --partitions 5 --topic cmcccdr
private final kafka.javaapi.producer.Producer<Integer, String> producer;
private final String topic;
private final Properties props = new Properties();
public CellProducer(String topic) {
props.put("serializer.class", "kafka.serializer.StringEncoder");// 字符串消息
props.put("metadata.broker.list", KafkaProperties.broker_list);
producer = new kafka.javaapi.producer.Producer<Integer, String>(new ProducerConfig(props));
this.topic = topic;
}
/*
* public void run() { // order_id,order_amt,create_time,province_id Random
* random = new Random(); String[] cell_num = { "29448-37062",
* "29448-51331", "29448-51331","29448-51333", "29448-51343" }; String[]
* drop_num = { "0","1","2"};//掉话1(信号断断续续) 断话2(完全断开)
*
* // Producer.java // record_time, imei, cell,
* ph_num,call_num,drop_num,duration,drop_rate,net_type,erl // 2011-06-28
* 14:24:59.867,356966,29448-37062,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,352024,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,351545,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51343,1,0,0,8,0,G,0 int i =0 ; NumberFormat nf
* = new DecimalFormat("000000"); while(true) { i ++ ; // String messageStr
* = i+"\t"+cell_num[random.nextInt(cell_num.length)]+"\t"+DateFmt.
* getCountDate(null,
* DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
* String testStr = nf.format(random.nextInt(10)+1);
*
* String messageStr =
* i+"\t"+("29448-"+testStr)+"\t"+DateFmt.getCountDate(null,
* DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
*
* System.out.println("product:"+messageStr); producer.send(new
* KeyedMessage<Integer, String>(topic, messageStr)); Utils.sleep(1000) ; //
* if (i==500) { // break; // } }
*
* }
*/
public void run() {
Random random = new Random();
String[] cell_num = { "29448-37062", "29448-51331", "29448-51331", "29448-51333", "29448-51343" };
// 正常0; 掉话1(信号断断续续); 断话2(完全断开)
String[] drop_num = { "0", "1", "2" };
int i = 0;
while (true) {
i++;
String testStr = String.format("%06d", random.nextInt(10) + 1);
// messageStr: 2494 29448-000003 2016-01-05 10:25:17 1
//
String messageStr = i + "\t" + ("29448-" + testStr) + "\t" + DateFmt.getCountDate(null, DateFmt.date_long)
+ "\t" + drop_num[random.nextInt(drop_num.length)];
System.out.println("product:" + messageStr);
producer.send(new KeyedMessage<Integer, String>(topic, messageStr));
Utils.sleep(1000);
// if(i == 500) {
// break;
// }
}
}
public static void main(String[] args) {
// topic设置
CellProducer producerThread = new CellProducer(KafkaProperties.Cell_Topic);
// 启动线程生成数据
producerThread.start();
}
}
package cmcc.constant;
public class Constants {
public static final String HBASE_ZOOKEEPER_LIST = "node4:2181";
public static final String KAFKA_ZOOKEEPER_LIST = "node2:2181,node3:2181,node4:2181";
public static final String BROKER_LIST = "node2:9092,node3:9092,node4:9092";
public static final String ZOOKEEPERS = "node2,node3,node4";
}
原文:https://www.cnblogs.com/xhzd/p/11610584.html