Creating an AI Chatbot That Parses Financial Information
2024-9-6 17:6:16 Author: hackernoon.com(查看原文) 阅读量:1 收藏

I have developed a COZE BOT called EDINET RABBIT and would like to introduce it to you.

Introduction

  • Financial statements are documents that allow companies to understand the current state of their business, so every company spends a considerable amount of time creating them. Or, in addition to important data, important information for the company, such as management policies, is created together with the team, and the important contents are audited by an auditing firm.
  • Securities reports are prepared in this way, but in reality they are quite voluminous (about 300 pages in PDF, about 600,000 characters) and contain many technical terms, so I think it is quite difficult for the general public to read and understand them.
  • First, we developed a BOT that briefly summarizes the contents of , which publishes securities reports of leading companies.

Basic operations

  • EDINET RABBIT is a COZE BOT that can obtain and analyze information from EDINET.

  • EDINET is a website operated by the Financial Services Agency where you can view securities reports and other information, and it contains data on all leading companies.

  • EDINET RABBIT can provide users with the latest securities report information they are looking for by simply entering the company name and necessary information.

Application Operation

  • For multiple companies for which financial data has been obtained, you can create tables and graphs that extract specific items and compare them.

How EDINET API works

Using our proprietary plugin "EDINET API" (Google App Script: edinet_new_gas_analize), we retrieve the necessary data from EDINET using the following procedure.

  1. Identify the document_id of the data to be retrieved from the company name.
  2. Retrieve the CSV of the data from the document_id and save it to Google Drive.
  3. Identify the parts of the CSV data that are frequently used by users and copy them to a spreadsheet.
  4. EDINET RABBIT retrieves the data required by the user from the spreadsheet and creates a response (ChatGPT4o).

※The list sheet that manages document_ids automatically retrieves the latest list document data every day using the GAS trigger function.

<edinet_new_gas_analize>

var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = ss.getSheetByName('list'); // 'list'にデータを入力

function getEDINET() {
  var startDate = new Date(); // 開始日
  var today = new Date(); // 今日の日付
  var dayInMillis = 24 * 60 * 60 * 1000; // 1日のミリ秒

  for (var date = startDate; date <= today; date = new Date(date.getTime() + dayInMillis)) {
    // 日付をYYYY-MM-DD形式に変換
    var formattedDate = Utilities.formatDate(date, "JST", "yyyy-MM-dd");
    var url = "https://api.edinet-fsa.go.jp/api/v2/documents.json?date=" + formattedDate + "&type=2&Subscription-Key=<APIキー>";
    var response = UrlFetchApp.fetch(url);
    var json = JSON.parse(response.getContentText());

    // 0件なら次の日に進む
    var kensu = json.metadata.resultset.count;
    if (kensu === 0) {
      continue;
    }

    var item = [
      "seqNumber",
      "docID",
      "edinetCode",
      "secCode",
      "JCN",
      "filerName",
      "fundCode",
      "ordinanceCode",
      "formCode",
      "docTypeCode",
      "periodStart",
      "periodEnd", 
      "submitDateTime",
      "docDescription",
      "issuerEdinetCode",
      "subjectEdinetCode",
      "subsidiaryEdinetCode",
      "currentReportReason",
      "parentDocID",
      "opeDateTime",
      "withdrawalStatus",
      "docInfoEditStatus",
      "disclosureStatus",
      "xbrlFlag",
      "pdfFlag",
      "attachDocFlag",
      "englishDocFlag",
      "csvFlag",
      "legalStatus"
    ];

    var ary = [];
    var ary2 = [];
    for (var i = 0; i < json.results.length; i++) {
      for (var j = 0; j < item.length; j++) {
        ary.push(json.results[i][item[j]] || "");
      }
      ary2.push(ary);
      ary = [];
    }

    // シートに反映
    if (ary2.length > 0) {
      sh.getRange(sh.getLastRow() + 1, 1, ary2.length, item.length).setValues(ary2);
    }
  }
}

function downloadUnzipRenameAndClean(docId) {
  var url = `https://api.edinet-fsa.go.jp/api/v2/documents/${docId}?type=5&Subscription-Key=<APIキー>`;
  var outputFolderId = '<非公開>';
  var tempFolderId = '<非公開>';
  var outputFolder = DriveApp.getFolderById(outputFolderId);
  var tempFolder = DriveApp.getFolderById(tempFolderId);

  try {
    // ZIPファイルのダウンロード
    var response = UrlFetchApp.fetch(url);
    var blob = response.getBlob();

    // 一時フォルダにZIPファイルを保存
    var zipFileName = `${docId}.zip`;
    var zipFile = tempFolder.createFile(blob.setName(zipFileName));

    // ZIPファイルを解凍
    var unzippedFiles = Utilities.unzip(zipFile.getBlob());
    var savedFileName;

    // 解凍したファイルとフォルダを処理
    unzippedFiles.forEach(function(file) {
      var fileName = file.getName();
      if (fileName.includes('jpcrp')) {
        // jpcrpを含むファイルの名称をdocIDに変更して保存
        savedFileName = docId + '.' + fileName.split('.').pop(); // 拡張子を保持
        outputFolder.createFile(file.setName(savedFileName));
        Logger.log('Saved: ' + savedFileName);
      }
    });

    // 一時フォルダの内容をクリア
    clearFolder(tempFolder);

    Logger.log('Process completed successfully.');
    return savedFileName; // 保存したファイル名を返す
  } catch (e) {
    Logger.log('Error: ' + e.toString());
    return null;
  }
}

// フォルダの内容をクリアする関数
function clearFolder(folder) {
  var files = folder.getFiles();
  while (files.hasNext()) {
    var file = files.next();
    file.setTrashed(true);
  }
}

function importCsvToSheet(docID) {
  const folderId = '<非公開>'; // 格納フォルダのID
  const fileName = `${docID}.csv`; // クエリパラメータから取得したdocIDを基にファイル名を設定

  // 格納フォルダとCSVファイルを取得
  const folder = DriveApp.getFolderById(folderId);
  const fileIterator = folder.getFilesByName(fileName);

  if (!fileIterator.hasNext()) {
    Logger.log('ファイルが見つかりませんでした。');
    return null;
  }

  const file = fileIterator.next();

  // ファイルをUTF-16LEとしてデコード
  const content = file.getBlob().getDataAsString('UTF-16LE');

  // スプレッドシートの操作
  const ss = SpreadsheetApp.getActiveSpreadsheet();
  const sheets = ['work1', 'work2', 'work3', 'work4', 'work5', 'work6', 'work7', 'work8', 'work9'];

  let targetSheet;
  for (let i = 0; i < sheets.length; i++) {
    const sheet = ss.getSheetByName(sheets[i]);
    if (sheet.getLastRow() === 0) { // データが無い場合
      targetSheet = sheet;
      break;
    }
  }

  if (!targetSheet) {
    targetSheet = ss.getSheetByName('work1'); // すべてのシートにデータがある場合、work1をクリアして使用
    targetSheet.clear();
  }

  // デコードされた内容をCSVとして解析し、スプレッドシートに張り付ける
  const data = Utilities.parseCsv(content, '\t'); // タブ区切りのCSVとして解析
  targetSheet.getRange(1, 1, data.length, data[0].length).setValues(data);

  // 探すべきA列の内容
  const searchKeys = [
    'jpcrp_cor:CompanyNameCoverPage',
    'jpcrp_cor:CompanyNameInEnglishCoverPage',
    'jpcrp_cor:TitleAndNameOfRepresentativeCoverPage',
    'jpcrp_cor:AddressOfRegisteredHeadquarterCoverPage',
    'jpcrp_cor:jpcrp_cor:PlaceForPublicInspectionCoverPageTextBlock',
    'jpcrp_cor:jpdei_cor:EDINETCodeDEI',
    'jpcrp_cor:jpcrp_cor:CompanyHistoryTextBlock',
    'jpcrp_cor:OverviewOfAffiliatedEntitiesTextBlock',
    'jpcrp_cor:InformationAboutEmployeesTextBlock',
    'jpcrp_cor:BusinessPolicyBusinessEnvironmentIssuesToAddressEtcTextBlock',
    'jpcrp030000-asr_E00369-000:DisclosureOfSustainabilityRelatedFinancialInformationTextBlock',
    'jpcrp030000-asr_E00369-000:GovernanceTextBlock',
    'jpcrp030000-asr_E00369-000:RiskManagementTextBlock',
    'jpcrp030000-asr_E00369-000:StrategyTextBlock',
    'jpcrp030000-asr_E00369-000:ReferenceToOtherInformationStrategy',
    'jpcrp030000-asr_E00369-000:MetricsAndTargetsTextBlock',
    'jpcrp030000-asr_E00369-000:PolicyOnHumanResourceDevelopmentAndImprovementOfInternalEnvironmentStrategyTextBlock',
    'jpcrp030000-asr_E00369-000:DescriptionOfMetricsRelatedToPolicyOnHumanResourceDevelopmentAndImprovementOfInternalEnvironmentAndTargetsAndResultsUsingSuchMetricsMetricsAndTargetsTextBlock',
    'jpcrp030000-asr_E00369-000:GovernanceClimateChangeTextBlock',
    'jpcrp030000-asr_E00369-000:StrategyClimateChangeTextBlock',
    'jpcrp030000-asr_E00369-000:RiskManagementClimateChangeTextBlock',
    'jpcrp030000-asr_E00369-000:MetricsAndTargetsClimateChangeTextBlock',
    'jpcrp_cor:BusinessRisksTextBlock',
    'jpcrp_cor:ManagementAnalysisOfFinancialPositionOperatingResultsAndCashFlowsTextBlock',
    'jpcrp_cor:CriticalContractsForOperationTextBlock',
    'jpcrp_cor:ResearchAndDevelopmentActivitiesTextBlock',
    'jpcrp_cor:OverviewOfCapitalExpendituresEtcTextBlock',
    'jpcrp_cor:MajorFacilitiesTextBlock',
    'jpcrp_cor:PlannedAdditionsRetirementsEtcOfFacilitiesTextBlock',
    'jpcrp_cor:OverviewOfCorporateGovernanceTextBlock',
    'jpcrp_cor:CorporateGovernanceCompanyWithCorporateAuditorsTextBlock',
    'jpcrp_cor:BasicPolicyRegardingControlOfCompanyTextBlock',
    'jpcrp_cor:ManagementAnalysisOfFinancialPositionOperatingResultsAndCashFlowsTextBlock',
    'jpcrp_cor:NameOfFinancialInstrumentsExchangeOnWhichSecuritiesAreListedOrAuthorizedFinancialInstrumentsBusinessAssociationToWhichSecuritiesAreRegistered',
    'jpcrp_cor:InformationAboutOfficersTextBlock',
    'jpcrp_cor:FiscalYearCoverPage',
    'jpcrp_cor:AverageAnnualSalaryInformationAboutReportingCompanyInformationAboutEmployees',
    'jpcrp_cor:NumberOfEmployees',
    'jpcrp_cor:DescriptionOfBusinessTextBlock'
  ];

  const additionalKeys = [
    'jpcrp_cor:NetSalesSummaryOfBusinessResults',
    'jpcrp_cor:OrdinaryIncomeLossSummaryOfBusinessResults',
    'jpcrp_cor:ProfitLossAttributableToOwnersOfParentSummaryOfBusinessResults',
    'jpcrp_cor:ComprehensiveIncomeSummaryOfBusinessResults',
    'jpcrp_cor:NetAssetsSummaryOfBusinessResults',
    'jpcrp_cor:TotalAssetsSummaryOfBusinessResults',
    'jpcrp_cor:NetAssetsPerShareSummaryOfBusinessResults',
    'jpcrp_cor:BasicEarningsLossPerShareSummaryOfBusinessResults',
    'jpcrp_cor:DilutedEarningsPerShareSummaryOfBusinessResults',
    'jpcrp_cor:EquityToAssetRatioSummaryOfBusinessResults',
    'jpcrp_cor:RateOfReturnOnEquitySummaryOfBusinessResults',
    'jpcrp_cor:PriceEarningsRatioSummaryOfBusinessResults'
  ];

  // A列およびC列を取得
  const columnA = targetSheet.getRange(1, 1, targetSheet.getLastRow(), 1).getValues().flat();
  const columnC = targetSheet.getRange(1, 3, targetSheet.getLastRow(), 1).getValues().flat();

  // 対象行を探し、見つけた行をリストに格納
  const rowsToInsert = [];
  for (let i = 0; i < columnA.length; i++) {
    if (searchKeys.includes(columnA[i])) {
      if (columnA[i] === 'jpcrp_cor:NumberOfEmployees') {
        if (columnC[i] === 'CurrentYearInstant') {
          // A列が'jpcrp_cor:NumberOfEmployees'でC列が'CurrentYearInstant'の行
          const rowData = targetSheet.getRange(i + 1, 1, 1, targetSheet.getLastColumn()).getValues();
          rowsToInsert.push(rowData[0]);
        }
      } else {
        // その他の行
        const rowData = targetSheet.getRange(i + 1, 1, 1, targetSheet.getLastColumn()).getValues();
        rowsToInsert.push(rowData[0]);
      }
    }
  }

  // 追加の用語がA列に含まれ、C列に「Current」を含む行をリストに追加
  for (let i = 0; i < columnA.length; i++) {
    if (additionalKeys.includes(columnA[i]) && columnC[i].includes('Current')) {
      const rowData = targetSheet.getRange(i + 1, 1, 1, targetSheet.getLastColumn()).getValues();
      rowsToInsert.push(rowData[0]);
    }
  }

  // 2行目以降のデータを削除
  const lastRow = targetSheet.getLastRow();
  if (lastRow > 1) {
    targetSheet.deleteRows(2, lastRow - 1);
  }

  // 2行目以降にデータを挿入
  if (rowsToInsert.length > 0) {
    targetSheet.insertRowsAfter(1, rowsToInsert.length); // 2行目以降に行を挿入
    targetSheet.getRange(2, 1, rowsToInsert.length, rowsToInsert[0].length).setValues(rowsToInsert);
  }

  return targetSheet.getName(); // 張り付けしたシート名を返す
}

function doGet(e) {
  var docID = e.parameter.docID; // クエリパラメータからdocIDを取得
  var keyword = e.parameter.companyName; // クエリパラメータからcompanyNameを取得
  var koumoku = e.parameter.koumoku; // クエリパラメータからkoumokuを取得

  if (keyword && keyword.toLowerCase() === 'deletesheets') {
    deleteSheetsData();
    return ContentService.createTextOutput(JSON.stringify({ status: "All sheets data deleted." }))
      .setMimeType(ContentService.MimeType.JSON);
  }

  if (docID && koumoku) {
    downloadUnzipRenameAndClean(docID); // docIDに基づいてファイルをダウンロードして処理
    var sheetName = importCsvToSheet(docID); // CSVをスプレッドシートにインポートする関数を呼び出し

    if (!sheetName) {
      return ContentService.createTextOutput(JSON.stringify({ error: "ファイルが見つかりませんでした。" }))
        .setMimeType(ContentService.MimeType.JSON);
    }

    var ss = SpreadsheetApp.getActiveSpreadsheet();
    var sheet = ss.getSheetByName(sheetName); // データが保存されているシートを取得
    var fullRange = sheet.getDataRange(); // シート全体のデータ
    var fullValues = fullRange.getValues();

    // B列をkoumokuで検索して、当該用語を含むデータを抽出
    var filteredValues = fullValues.filter(row => row[1].toString().includes(koumoku));

    return ContentService.createTextOutput(JSON.stringify(filteredValues))
      .setMimeType(ContentService.MimeType.JSON);

  } else if (docID) {
    downloadUnzipRenameAndClean(docID); // docIDに基づいてファイルをダウンロードして処理
    var sheetName = importCsvToSheet(docID); // CSVをスプレッドシートにインポートする関数を呼び出し

    if (!sheetName) {
      return ContentService.createTextOutput(JSON.stringify({ error: "ファイルが見つかりませんでした。" }))
        .setMimeType(ContentService.MimeType.JSON);
    }

    // JSONオブジェクト形式で"ok"とシート名を返す
    var jsonObject = {
      "status": "ok",
      "status_code": 200,
      "sheet_name": sheetName,
      "action_id": "<非公開>"
    };

    return ContentService.createTextOutput(JSON.stringify(jsonObject))
      .setMimeType(ContentService.MimeType.JSON);

  } else if (keyword) {
    var ss = SpreadsheetApp.openById('<非公開>');
    var sheet = ss.getSheetByName('list'); // シート名でシートを取得
    var dataRange = sheet.getDataRange();
    var values = dataRange.getValues();

    var filteredValues = values.filter(row => row[7] == 10 && row[9] == 120); // H列が10、J列が120の行を絞り込む

    var matchingRow = filteredValues.find(row => row[5] && row[5].toString().toLowerCase().includes(keyword.toLowerCase()));
    if (matchingRow) {
      var jsonObject = {
        id: matchingRow[0],
        document_id: matchingRow[1],
        company_code: matchingRow[2],
        empty_field1: matchingRow[3],
        jcn: matchingRow[4],
        company_name: matchingRow[5],
        empty_field2: matchingRow[6],
        type_code: matchingRow[7],
        value1: matchingRow[8],
        value2: matchingRow[9],
        start_date: matchingRow[10],
        end_date: matchingRow[11],
        submit_date: matchingRow[12],
        document_description: matchingRow[13]
      };
      return ContentService.createTextOutput(JSON.stringify(jsonObject))
        .setMimeType(ContentService.MimeType.JSON);
    } else {
      return ContentService.createTextOutput(JSON.stringify({ error: "キーワードにマッチする企業名が見つかりませんでした。" }))
        .setMimeType(ContentService.MimeType.JSON);
    }
  } else {
    return ContentService.createTextOutput(JSON.stringify({ error: "docIDまたはキーワードを指定してください。" }))
      .setMimeType(ContentService.MimeType.JSON);
  }
}

function deleteSheetsData() {
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheets = ['work1', 'work2', 'work3', 'work4', 'work5', 'work6', 'work7', 'work8', 'work9'];

  sheets.forEach(function(sheetName) {
    var sheet = ss.getSheetByName(sheetName);
    if (sheet) {
      var lastRow = sheet.getLastRow();
      if (lastRow > 0) {
        sheet.getRange(1, 1, lastRow, sheet.getLastColumn()).clearContent(); // すべてのデータをクリア
      }
    }
  });
}

function findSpreadsheetById(driveId, docID) {
  var folder = DriveApp.getFolderById(driveId);
  var files = folder.getFilesByName(docID);
  while (files.hasNext()) {
    var file = files.next();
    if (file.getMimeType() === MimeType.GOOGLE_SHEETS) {
      return file;
    }
  }
  return null;
}

Others

  • This COZE BOT uses EDINET API, Google Sheets, Code Interpriter, and Google Web Search as plugins.
  • EDINET API connects to GAS edinet_new_gas_analize and retrieves data from Document List API and Document Acquisition API.
  • There is likely to be corporate securities report data retrieved by the Document Acquisition API, but currently ChatGPT4o cannot identify the correct answer for the user.
  • For this reason, we designed a GAS that can read and interpret securities reports and copy only the parts that users frequently use. If you would like more detailed data, we recommend that you access EDINET directly on the web.
  • The spreadsheet can store announcement data for up to 9 companies.
  • By using this mechanism, EDINET RABBIT can quickly provide accurate information to companies.
  • This COZE BOT was submitted to the Coze Hackathon.

※This Bot won the Grand Prize at the Coze Hackathon on June 29th!

※This bot was selected as the most creative bot in the June Coze AI Bot Challenge winners announcement, so we would like to introduce it to you.


文章来源: https://hackernoon.com/creating-an-ai-chatbot-that-parses-financial-information?source=rss
如有侵权请联系:admin#unsafe.sh