直接上代碼
{
"require": {
"phpoffice/phpword": "^0.18.3"
}
}
<?php
include_once "vendor/autoload.php";
$info = './234.docx';
index($info);
function index($info)
{
$word = getWord($info);
//可以處理提取出來的資料
foreach($word as $kkk => $vvv)
{
echo $vvv.'<pre>';
}
}
/**
* 擷取word文檔内容
* @param string $path
* @return array
*/
function getWord($path = '')
{
//加載word文檔,使用phpword處理
$phpWord = \PhpOffice\PhpWord\IOFactory::load($path);
return getNodeContent($phpWord);
}
/**
* 根據word主節點擷取分節點内容
* @param $word
* @return array
*/
function getNodeContent($word)
{
$return = [];
//分解部分
foreach ($word->getSections() as $section)
{
if ($section instanceof \PhpOffice\PhpWord\Element\Section) {
//分解元素
foreach ($section->getElements() as $element)
{
//文本元素
if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
$text = '';
foreach ($element->getElements() as $ele) {
$text .= getTextNode($ele);
}
$return[] = $text;
}
//表格元素
else if ($element instanceof \PhpOffice\PhpWord\Element\Table) {
foreach ($element->getRows() as $ele)
{
$return[] = getTableNode($ele);
}
}
}
}
}
return $return;
}
/**
* 擷取文檔節點内容
* @param $node
* @return string
*/
function getTextNode($node)
{
$return = '';
//處理文本
if ($node instanceof \PhpOffice\PhpWord\Element\Text)
{
$return .= $node->getText();
}
//處理圖檔
else if ($node instanceof \PhpOffice\PhpWord\Element\Image)
{
$return .= pic2text($node);
}
//處理文本元素
else if ($node instanceof \PhpOffice\PhpWord\Element\TextRun) {
foreach ($node->getElements() as $ele) {
$return .= getTextNode($ele);
}
}
return $return;
}
/**
* 擷取表格節點内容
* @param $node
* @return string
*/
function getTableNode($node)
{
$return = '';
//處理行
if ($node instanceof \PhpOffice\PhpWord\Element\Row) {
foreach ($node->getCells() as $ele)
{
$return .= getTableNode($ele);
}
}
//處理列
else if ($node instanceof \PhpOffice\PhpWord\Element\Cell) {
foreach ($node->getElements() as $ele)
{
$return .= getTextNode($ele);
}
}
return $return;
}
/**
* 處理word文檔中base64格式圖檔
* @param $node
* @return string
*/
function pic2text($node)
{
//擷取圖檔編碼
$imageData = $node->getImageStringData(true);
//添加圖檔html顯示标頭
$imageData = 'data:' . $node->getImageType() . ';base64,' . $imageData;
$return = '<img src="'.$imageData.'">';
return $return;
}
/**
* 處理word文檔中base64格式圖檔
* @param $node
* @return string
*/
function pic2file($node)
{
//圖檔位址(一般為word文檔位址+在word中的錨點位置)
$imageSrc = 'images/' . md5($node->getSource()) . '.' . $node->getImageExtension();
$imageData = $node->getImageStringData(true);
//将圖檔儲存在本地
file_put_contents($imageSrc, base64_decode($imageData));
return $imageSrc;
}
/**
* 将word轉化為html(轉換存儲html檔案後展示)
* @param $path
* @throws \PhpOffice\PhpWord\Exception\Exception
*/
function word2html($path)
{
$phpWord = FileImportService::getOne($path);
//轉為html處理
$xmlWriter = \PhpOffice\PhpWord\IOFactory::createWriter($phpWord, "HTML");
$path = pathinfo($path);
$fileName = $path['dirname'] . '/' . $path['filename'] . '.html';
$xmlWriter->save($fileName);
$html = file_get_contents($fileName);
echo $html;
die;
}