天天看点

Oozie开发记录

oozie开发文档

文章目录

    • oozie开发文档
    • 总结

oozie开放文档

  1. 设置时区,cdh修改 oozie-site.xml
    oozie.processing.timezone
    GMT+0800
               
  2. oozie通过cdh方式部署时,oozie的依赖没有上传至hdfs,需要我们手动上传
    cd /opt/cloudera/parcels/CDH/lib/oozie/oozie-sharelib-yarn
    su hdfs
    hdfs -dfs -mkdir -p /user/oozie/share/
    hdfs -dfs -put lib/ /user/oozie/share/
    hdfs -dfs -chown -R oozie:oozie /user/oozie/
               
  3. 编写workflow.xml

    主要是用到了 fork 、 join 和 action

    • fork 使得一组action能并行执行
    • join使得等到之前的任务都结束了才执行join指向的action,类似于检查点.
    • action就是我们要进行的操作了,这里执行的是shell脚本,如果需要hive等其他的操作,需要参考文档
    <workflow-app name="数仓全流程" xmlns="uri:oozie:workflow:0.4">
        <start to="org_to_ods_forking"/>
        <fork name="org_to_ods_forking">
            <path start="org_to_ods_ga_viid_0001" />
            <path start="org_to_ods_ga_viid_0002" />
            <path start="org_to_ods_wa_dw_0001" />
            <path start="org_to_ods_wa_source_fj_0001" />
            <path start="org_to_ods_wa_source_fj_0002" />
            <path start="org_to_ods_wa_source_fj_1001" />
            <path start="org_to_ods_wa_source_fj_1002" />
            <path start="org_to_ods_wa_wb_0001" />
        </fork>
    
    	
        <action name="org_to_ods_ga_viid_0001">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_ga_viid_0001</argument>
                <argument>${flumeDir}/topic_wa_dw_0001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <action name="org_to_ods_ga_viid_0002">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_ga_viid_0001</argument>
                <argument>${flumeDir}/topic_wa_dw_0002</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <action name="org_to_ods_wa_dw_0001">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_dw_0001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <action name="org_to_ods_wa_source_fj_0001">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_source_fj_0001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    	
        <action name="org_to_ods_wa_source_fj_0002">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_source_fj_0002</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <action name="org_to_ods_wa_source_fj_1001">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_source_fj_1001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <action name="org_to_ods_wa_source_fj_1002">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_source_fj_1002</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
    
        <action name="org_to_ods_wa_wb_0001">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_ods_wa_wb_0001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="ods_next_dwd_joining"/>
            <error to="Kill"/>
        </action>
    
        <join name="ods_next_dwd_joining" to="ods_to_dwd"/>
    
        <action name="ods_to_dwd">
            <shell xmlns="uri:oozie:shell-action:0.2">
                <job-tracker>${jobTracker}</job-tracker>
                <name-node>${nameNode}</name-node>
                <exec>/user/task/shell/${EXEC}</exec>
                <argument>ods_wa_dw_0001</argument>
                <argument>${flumeDir}/topic_wa_dw_0001</argument>
                <file>/user/task/shell/hdfs_to_ods.sh#${EXEC}</file>
                <capture-output/>
            </shell>
            <ok to="end"/>
            <error to="Kill"/>
        </action>
    
    
        <kill name="Kill">
            <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
        </kill>
    
        <end name="end"/>
    </workflow-app>
               
  4. 编写coordinator.xml

    简单点讲就是一个定时器,执行执行规则和需要执行的work,即可

    <coordinator-app name="TBL数仓定时协调器"
                     frequency="30 1 * * *"
                     start="${start_date}" end="${end_date}" timezone="Asia/Shanghai"
                     xmlns="uri:oozie:coordinator:0.2">
    <controls>
        <execution>FIFO</execution>
    </controls>
    <action>
        <workflow>
            <app-path>${wf_application_path}</app-path>
            <configuration>
                <property>
                    <name>oozie.use.system.libpath</name>
                    <value>True</value>
                </property>
                <property>
                    <name>start_date</name>
                    <value>${start_date}</value>
                </property>
                <property>
                    <name>end_date</name>
                    <value>${end_date}</value>
                </property>
            </configuration>
        </workflow>
    </action>
    </coordinator-app>
    
               
  5. 编写job.properties
    jobTracker=cdh211:8032
    #请查阅cdh你的配置
    nameNode=hdfs://nameservice1
    queueName=default
    user.name=hdfs
    flumeDir=/flume/capture
    oozie.use.system.libpath=true
    mapreduce.job.user.name=hdfs
    EXEC=hdfs_to_ods.sh
    start_date=	2021-09-16T22:40+0800
    end_date=2121-09-23T23:40+0800
    oozie.coord.application.path=${nameNode}/user/${user.name}/workspaces/coordinator
    wf_application_path=${nameNode}/user/${user.name}/workspaces/workflow/
               
  6. 上传workflow.xml和coordinator.xml至hdfs
    hdfs dfs -put workflow.xml /user/hdfs/workspaces/workflow/
    	hdfs dfs -put coordinator.xml /user/hdfs/workspaces/coordinator/
               
  7. 推送至oozie,也可以用hue来提交,但是命令提交更方便
    oozie job -oozie http://cdh210:11000/oozie -config job.properties -run
               
  8. 查看任务信息,hue有很好的图形化界面可以查看详细的信息
    job -oozie http://cdh210:11000/oozie -info 你的任务号
               

总结

为什么要选择oozie,是出于运维方便的角度去考量的,相比azkaban来讲,oozie比较复杂,但是都是任务流程调度框架,根据各自实际情况去选择就好了。

继续阅读