Video Analysis based on Azure cognitive-services

会议大纲生成系统

这是一套系统能为线上会议或视频课程在会后提供一份会议摘要,让参会人员一目十行地用最短时间回顾整体会议的主要内容,快速定位会议重点,节省员工或课程学员回看会议录屏的时间。

项目的主题是视频分析,将视频的音频和图像部分分别解析出文字大纲和画面截屏,最终产出一个图文结合的总结性大纲。

sharktank

Azure Indexer

https://www.videoindexer.ai/

Azure Summarization

https://docs.microsoft.com/zh-cn/azure/cognitive-services/language-service/summarization/quickstart?tabs=document-summarization&pivots=programming-language-java

import { TextAnalyticsClient, AzureKeyCredential } from '@azure/ai-text-analytics';
const client = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));

async summarization() {
  const actions = {
    extractSummaryActions: [
      { modelVersion: 'latest', orderBy: 'Rank', maxSentenceCount: 15 },
    ],
  };
  const poller = await client.beginAnalyzeActions(documents, actions, 'en');
  const resultPages = await poller.pollUntilDone();

  for await (const page of resultPages) {
    const extractSummaryAction = page.extractSummaryResults[0];
    if (!extractSummaryAction.error) {
      for (const doc of (extractSummaryAction as any).results) {
        if (!doc.error) {
          console.log('\tSummary:', doc.sentences);
        } else {
          console.error('\tError:', doc.error);
        }
      }
    }
  }
}

Azure Translate

https://docs.microsoft.com/zh-cn/azure/cognitive-services/translator/quickstart-translator?tabs=nodejs

import { v4 as uuidv4 } from 'uuid';

async translate(text: string, lan = 'zh'): Promise<string> {
  return axios({
    baseURL: endpoint,
    url: '/translate',
    method: 'post',
    headers: {
      'Ocp-Apim-Subscription-Key': key,
      'Ocp-Apim-Subscription-Region': region,
      'Content-type': 'application/json',
      'X-ClientTraceId': uuidv4().toString(),
    },
    params: {
      'api-version': '3.0',
      from: lan === 'en' ? 'zh' : 'en',
      to: [lan],
    },
    data: [
      {
        text,
      },
    ],
    data,
    responseType: 'json',
  })
    .then(function (response) {
      return response.data
        .map((item) => item.translations[0].text)
        .reduce((d, p) => d.concat(p), '');
    })
    .catch(function (error) {
      console.error(error);
      return 'error';
    });
}

Azure KeyPhrases

https://docs.microsoft.com/zh-cn/azure/cognitive-services/language-service/key-phrase-extraction/quickstart?pivots=programming-language-javascript

const client = new TextAnalyticsClient(endpoint, new AzureKeyCredential(key));

async keyPhrases(text: string) {
  const keyPhrasesInput = [text];
  const keyPhraseResult = await client.extractKeyPhrases(keyPhrasesInput);

  keyPhraseResult.forEach((document: ExtractKeyPhrasesSuccessResult) => {
    console.log(`ID: ${document.id}`);
    console.log(`\tDocument Key Phrases: ${document.keyPhrases}`);
  });
}

code: ‘InvalidDocument’, message: ‘A document within the request was too large to be processed. Limit document size to: 5120 text elements. For additional details on the data limitations see https://aka.ms/text-analytics-data-limits’,

Azure SpeechToText

https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/get-started-speech-to-text?tabs=terminal&pivots=programming-language-javascript

import * as sdk from 'microsoft-cognitiveservices-speech-sdk';

const speechConfig = sdk.SpeechConfig.fromSubscription(key3, region2);
speechConfig.speechRecognitionLanguage = 'zh-CN';

async speechRecognition() {
  const audioConfig = sdk.AudioConfig.fromWavFileInput(
    readFileSync(process.cwd() + '/wav/song.wav'),
  );
  const speechRecognizer = new sdk.SpeechRecognizer(
    speechConfig,
    audioConfig,
  );

  speechRecognizer.recognizeOnceAsync((result) => {
    switch (result.reason) {
      case sdk.ResultReason.RecognizedSpeech:
        console.log(`RECOGNIZED: Text=${result.text}`);
        break;
    }
    speechRecognizer.close();
  });
}

Azure Speakers Identification

https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-use-conversation-transcription?pivots=programming-language-javascript

const recognization = new Promise((resolve, reject) => {
  const conversation = sdk.Conversation.createConversationAsync(
    speechTranslationConfig,
    'myConversation',
  );
  const audioConfig = sdk.AudioConfig.fromWavFileInput(
    readFileSync(`${process.cwd()}/wav/${file}.wav`),
  );
  const transcriber = new sdk.ConversationTranscriber(audioConfig);

  transcriber.joinConversationAsync(conversation, function () {
    transcriber.transcribed = function (s, e) {
      const { text, speakerId, offset } = e.result;
      if (speakerId && speakerId.includes('Guest')) {
        console.log(
          '(transcribed) text: ' + text,
          '(transcribed) speakerId: ' + speakerId,
        );
      }
    };
    transcriber.startTranscribingAsync(
      function () {
        console.log('starting transcription');
      },
      function (err) {
        console.trace('err - starting transcription: ' + err);
      },
    );
  });
});

FFmpeg — video to audio

1.ffmpeg-nodejs

https://ffmpeg.org/ffmpeg.html

https://www.npmjs.com/package/ffmpeg-nodejs

npm i cmake-js -g
brew install cmake
npm i ffmpeg-nodejs

报错:Policy CMP0042 is not set: MACOSX_RPATH is enabled by default.

npm cache ls ffmpeg-nodejs

下载 https://registry.npmjs.org/ffmpeg-nodejs/-/ffmpeg-nodejs-0.4.4.tgz

npm install 报错:nasm/yasm not found or too old. Use —disable-x86asm for a crippled build.

下载 https://tortall.net/projects/yasm/releases/yasm-1.2.0.tar.gz

cd yasm-1.2.0
./configure
make
sudo make install
yasm --version

放弃

2.symbl-media

brew install ffmpeg

https://www.npmjs.com/package/symbl-media

npm i symbl-media

const video_addr = process.cwd() + '/video/tschina.mp4';
const output_addr = process.cwd() + '/wav/tschina.wav';
await transcodeMediaFile(video_addr, output_addr, 'wav')
ffmpeg -i input.mp4 output.avi

3.ffmpeg-extract-frames

npm i ffmpeg-extract-frames-quality

返回关键帧时间点

ffmpeg -i ./video/dm.mp4 -filter:v "select='gt(scene,0.1)',showinfo" -f null - 2>&1

返回截图

await extractFrames({
  input: video_addr,
  output: output_img,
  offsets: [1000, 2000, 650000],
});

OCR

brew install tesseract
brew install tesseract-lang
tesseract -v

tesseract /Users/jyu/code/sharktank/send-pdf/img/ocr1.png /Users/jyu/code/sharktank/send-pdf/img/result -l eng

tesseract /Users/jyu/code/sharktank/send-pdf/img/test-1.jpg /Users/jyu/code/sharktank/send-pdf/img/result -l chi_sim

npm install node-tesseract-ocr

import * as tesseract from 'node-tesseract-ocr';

tesseract
  .recognize(source_img, {
    lang: 'eng',
  })
  .then((text) => {
    console.log('Result:', text);
  })
  .catch((error) => {
    console.log(error.message);
  });

html-pdf

npm install — save html-pdf npm install — save ejs

踩坑:”Error”,”errorMessage”:”write EPIPE

yum -y install fontconfig 
fc-list 
fc-list :lang-zh
fc-list :lang=en 

mkdir /usr/share/fonts/chinese
下载 雅黑字体 msyh.ttf
mv -f /home/v-jyu/fonts/* /usr/share/fonts/chinese/
fc-list :lang-zh

attachment/exportData.ts

import * as pdf from 'html-pdf';
import * as ejs from 'ejs';

export default async function savePDF(meetingData, filename): Promise<string> {
  const run = new Promise((resolve, reject) => {
    let html;
    const template = process.cwd() + '/email-template/datamail.code.ejs';
    ejs.renderFile(template, { meetingData }, (err, data) => {
      html = data;
    });
    const options = { format: 'Letter' };
    pdf
      .create(html, options)
      .toFile(`pdf/${filename}.pdf`, function (err, res) {
        if (err) {
          reject(err);
        }
        resolve(res.filename);
      });
  });

  return run.then((data: string) => {
    return data;
  });
}

emaili.service.tschina

const outlinefile = await savePDF(meetingData, 'courseOutline');
await this.mailerService.sendMail({
  to,
  subject,
  template: 'datamail.code.ejs',
  context: { meetingData },
  attachments: [
    {
      filename: '会议大纲.pdf',
      path: outlinefile,
    },
  ],
});

email-template/datamail.code.ejs

<html>
  <body>
    <div class="container">
      <p class="title"><%=locals.meetingData.title%></p>
      <% for(var i =0; i < locals.meetingData.content.length; i++){ %>
        <p class="sub-title"><%=locals.meetingData.content[i].keyPhases[0]%></p>
        <img src="https://xxx.cn/static/<%=locals.meetingData.content[i].shot%>.jpg" />
        <p class="content"><%=locals.meetingData.content[i].summary%></p>
      <% } %>
    </div>
  </body>
</html>
Article
Tagcloud
DVA Java Express Architecture Azure CI/CD database ML AWS ETL nest sql AntV Next Deep Learning Flutter TypeScript Angular DevTools Microsoft egg Tableau SAP Token Regexp Unit test Nginx nodeJS sails wechat Jmeter HTML2Canvas Swift Jenkins JS event GTM Algorithm Echarts React-Admin Rest React hook Flux Redux ES6 Route Component Ref AJAX Form JSX Virtual Dom Javascript CSS design pattern