[mlir] Make the split markers of splitAndProcessBuffer configurable. (#84765)

This allows to define custom splitters, which is interesting for non-MLIR inputs and outputs to `mlir-translate`. For example, one may use `; -----` as a splitter of `.ll` files. The splitters are now passed as arguments into `splitAndProcessBuffer`, the input splitter defaulting to the previous default (`// -----`) and the output splitter defaulting to the empty string, which also corresponds to the previous default. The behavior of the input split marker should not change at all; however, outputs now have one new line *more* than before if there is no splitter (old: `insertMarkerInOutput = false`, new: `outputSplitMarker = ""`) and one new line *less* if there is one. The value of the input splitter is exposed as a command line options of `mlir-translate` and other tools as an optional value to the previously existing flag `-split-input-file`, which defaults to the default splitter if not specified; the value of the output splitter is exposed with the new `-output-split-marker`, which default to the empty string in `mlir-translate` and the default splitter in the other tools. In short, the previous usage or omission of the flags should result in previous behavior (modulo the new lines mentioned before).
2024-03-14 13:55:50 +01:00 · 2024-03-14 13:55:50 +01:00 · 516ccce7fa
commit 516ccce7fa
parent 2e271ceff6
12 changed files with 191 additions and 59 deletions
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@ -15,6 +15,8 @@

 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
 #include <memory>

 namespace llvm {
@ -27,20 +29,24 @@ struct LogicalResult;
 using ChunkBufferHandler = function_ref<LogicalResult(
    std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;

-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
+extern inline const char *const kDefaultSplitMarker = "// -----";
+
+/// Splits the specified buffer on a marker (`// -----` by default), processes
+/// each chunk independently according to the normal `processChunkBuffer` logic,
+/// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
-/// into a single file. `enableSplitting` can be used to toggle if splitting
-/// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
-/// between each of the processed output chunks.
+/// into a single file. The input split marker is configurable. If it is empty,
+/// merging is disabled, which allows for merging split and non-split code
+/// paths. Output split markers (`//-----` by default) followed by a new line
+/// character, respectively, are placed between each of the processed output
+/// chunks. (The new line character is inserted even if the split marker is
+/// empty.)
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                      ChunkBufferHandler processChunkBuffer, raw_ostream &os,
-                      bool enableSplitting = true,
-                      bool insertMarkerInOutput = false);
+                      llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+                      llvm::StringRef outputSplitMarker = "");
 } // namespace mlir

 #endif // MLIR_SUPPORT_TOOLUTILITIES_H
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@ -15,6 +15,7 @@

 #include "mlir/Debug/CLOptionsSetup.h"
 #include "mlir/Support/LogicalResult.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "llvm/ADT/StringRef.h"

 #include <cstdlib>
@ -136,13 +137,24 @@ public:
  }
  bool shouldShowDialects() const { return showDialectsFlag; }

-  /// Set whether to split the input file based on the `// -----` marker into
-  /// pieces and process each chunk independently.
-  MlirOptMainConfig &splitInputFile(bool split = true) {
-    splitInputFileFlag = split;
+  /// Set the marker on which to split the input into chunks and process each
+  /// chunk independently. Input is not split if empty.
+  MlirOptMainConfig &
+  splitInputFile(std::string splitMarker = kDefaultSplitMarker) {
+    splitInputFileFlag = std::move(splitMarker);
    return *this;
  }
-  bool shouldSplitInputFile() const { return splitInputFileFlag; }
+  bool shouldSplitInputFile() const { return splitInputFileFlag.empty(); }
+  StringRef inputSplitMarker() const { return splitInputFileFlag; }
+
+  /// Set whether to merge the output chunks into one file using the given
+  /// marker.
+  MlirOptMainConfig &
+  outputSplitMarker(std::string splitMarker = kDefaultSplitMarker) {
+    outputSplitMarkerFlag = std::move(splitMarker);
+    return *this;
+  }
+  StringRef outputSplitMarker() const { return outputSplitMarkerFlag; }

  /// Disable implicit addition of a top-level module op during parsing.
  MlirOptMainConfig &useExplicitModule(bool useExplicitModule) {
@ -215,9 +227,12 @@ protected:
  /// Show the registered dialects before trying to load the input file.
  bool showDialectsFlag = false;

-  /// Split the input file based on the `// -----` marker into pieces and
-  /// process each chunk independently.
-  bool splitInputFileFlag = false;
+  /// Split the input file based on the given marker into chunks and process
+  /// each chunk independently. Input is not split if empty.
+  std::string splitInputFileFlag = "";
+
+  /// Merge output chunks into one file using the given marker.
+  std::string outputSplitMarkerFlag = "";

  /// Use an explicit top-level module op during parsing.
  bool useExplicitModuleFlag = false;
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@ -21,22 +21,20 @@ using namespace mlir;
 LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                            ChunkBufferHandler processChunkBuffer,
-                            raw_ostream &os, bool enableSplitting,
-                            bool insertMarkerInOutput) {
+                            raw_ostream &os, llvm::StringRef inputSplitMarker,
+                            llvm::StringRef outputSplitMarker) {
  // If splitting is disabled, we process the full input buffer.
-  if (!enableSplitting)
+  if (inputSplitMarker.empty())
    return processChunkBuffer(std::move(originalBuffer), os);

-  const char splitMarkerConst[] = "// -----";
-  StringRef splitMarker(splitMarkerConst);
-  const int splitMarkerLen = splitMarker.size();
+  const int inputSplitMarkerLen = inputSplitMarker.size();

  auto *origMemBuffer = originalBuffer.get();
  SmallVector<StringRef, 8> rawSourceBuffers;
  const int checkLen = 2;
  // Split dropping the last checkLen chars to enable flagging near misses.
  origMemBuffer->getBuffer().split(rawSourceBuffers,
-                                   splitMarker.drop_back(checkLen));
+                                   inputSplitMarker.drop_back(checkLen));
  if (rawSourceBuffers.empty())
    return success();

@ -58,8 +56,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
    }

    // Check that suffix is as expected and doesn't have any dash post.
-    bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
-                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    bool expectedSuffix =
+        buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
+        buffer.size() > checkLen && buffer[checkLen] != '0';
    if (expectedSuffix) {
      sourceBuffers.push_back(prev);
      prev = buffer.drop_front(checkLen);
@ -69,8 +68,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
      fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                 llvm::SourceMgr::DK_Warning,
                                 "near miss with file split marker");
-      prev = StringRef(prev.data(),
-                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+      prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
+                                        checkLen + buffer.size());
    }
  }
  if (!prev.empty())
@ -89,7 +88,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
      hadFailure = true;
  };
  llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput ? "\n// -----\n" : "");
+                   (llvm::Twine(outputSplitMarker) + "\n").str());

  // If any fails, then return a failure of the tool.
  return failure(hadFailure);
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Tools/lsp-server-support/Transport.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/Protocol.h"
 #include "llvm/ADT/SmallString.h"
@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
    StringRef lineRef = line.str().trim();
    if (lineRef.starts_with("//")) {
      // Found a delimiter for the message.
-      if (lineRef == "// -----")
+      if (lineRef == kDefaultSplitMarker)
        break;
      continue;
    }
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@ -15,6 +15,7 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Parser/Parser.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
 #include "llvm/ADT/StringExtras.h"
@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
  context.allowUnregisteredDialects();

  // Split the file into separate MLIR documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
  SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
  chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
      context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));

--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@ -127,11 +127,21 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
        cl::desc("Print the list of registered dialects and exit"),
        cl::location(showDialectsFlag), cl::init(false));

-    static cl::opt<bool, /*ExternalStorage=*/true> splitInputFile(
-        "split-input-file",
-        cl::desc("Split the input file into pieces and process each "
-                 "chunk independently"),
-        cl::location(splitInputFileFlag), cl::init(false));
+    static cl::opt<std::string, /*ExternalStorage=*/true> splitInputFile(
+        "split-input-file", llvm::cl::ValueOptional,
+        cl::callback([&](const std::string &str) {
+          // Implicit value: use default marker if flag was used without value.
+          if (str.empty())
+            splitInputFile.setValue(kDefaultSplitMarker);
+        }),
+        cl::desc("Split the input file into chunks using the given or "
+                 "default marker and process each chunk independently"),
+        cl::location(splitInputFileFlag), cl::init(""));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> outputSplitMarker(
+        "output-split-marker",
+        cl::desc("Split marker to use for merging the ouput"),
+        cl::location(outputSplitMarkerFlag), cl::init(kDefaultSplitMarker));

    static cl::opt<bool, /*ExternalStorage=*/true> verifyDiagnostics(
        "verify-diagnostics",
@ -533,8 +543,8 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                         threadPool);
  };
  return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile(),
-                               /*insertMarkerInOutput=*/true);
+                               config.inputSplitMarker(),
+                               config.outputSplitMarker());
 }

 LogicalResult mlir::MlirOptMain(int argc, char **argv,
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@ -10,6 +10,7 @@

 #include "Protocol.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/PDLL/AST/Context.h"
 #include "mlir/Tools/PDLL/AST/Nodes.h"
 #include "mlir/Tools/PDLL/AST/Types.h"
@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
        [&](PDLTextFileChunk &chunk) {
          chunk.document.getPDLLViewOutput(outputOS, kind);
        },
-        [&] { outputOS << "\n// -----\n\n"; });
+        [&] { outputOS << "\n"
+                       << kDefaultSplitMarker << "\n\n"; });
  }
  return result;
 }
@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
  chunks.clear();

  // Split the file into separate PDL documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
  SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
  chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
      /*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
      diagnostics));
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@ -62,11 +62,16 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
      llvm::cl::desc("Allow operation with no registered dialects (discouraged: testing only!)"),
      llvm::cl::init(false));

-  static llvm::cl::opt<bool> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and "
-                     "process each chunk independently"),
-      llvm::cl::init(false));
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));

  static llvm::cl::opt<bool> verifyDiagnostics(
      "verify-diagnostics",
@ -80,6 +85,11 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                     "(discouraged: testing only!)"),
      llvm::cl::init(false));

+  static llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for merging the ouput"),
+      llvm::cl::init(""));
+
  llvm::InitLLVM y(argc, argv);

  // Add flags for all the registered translations.
@ -176,7 +186,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
  };

  if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile)))
+                                   output->os(), inputSplitMarker,
+                                   outputSplitMarker)))
    return failure();

  output->keep();
--- a/mlir/test/mlir-opt/split-markers.mlir
+++ b/mlir/test/mlir-opt/split-markers.mlir
@ -1,6 +1,13 @@
-// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t &&  FileCheck --input-file %t %s
+// Check near-miss mechanics:
+// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t \
+// RUN: &&  FileCheck --input-file %t %s
 // RUN: cat %t

+// Check that (1) custom input splitter and (2) custom output splitters work.
+// RUN: mlir-opt %s -split-input-file="// CHECK: ""----" \
+// RUN:   -output-split-marker="// ---- next split ----" \
+// RUN: | FileCheck -input-file %s -check-prefix=CHECK-SPLITTERS %s
+
 func.func @main() {return}

 // -----
@ -20,3 +27,9 @@ func.func @bar2() {return }

 // No error flagged at the end for a near miss.
 // ----
+
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
--- a/mlir/test/mlir-pdll/split-markers.pdll
+++ b/mlir/test/mlir-pdll/split-markers.pdll
@ -0,0 +1,36 @@
+// Check that (1) the default input split marker used if no custom marker is
+// specified and (2) the output file is merged using the default marker.
+// RUN: mlir-pdll %s -split-input-file \
+// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s
+
+// Check that the custom (3) input and (output) split markers are used if
+// provided.
+// RUN: mlir-pdll %s \
+// RUN:   -split-input-file="// ""=====" -output-split-marker "// #####" \
+// RUN: | FileCheck -check-prefix=CHECK-CUSTOM %s
+
+// CHECK-DEFAULT:      Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT-NOT:  PatternDecl
+// CHECK-DEFAULT:      //{{ }}-----
+// CHECK-DEFAULT-NEXT: Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT:      PatternDecl
+
+// CHECK-CUSTOM:      Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM:      PatternDecl
+// CHECK-CUSTOM:      // #####
+// CHECK-CUSTOM-NEXT: Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM-NOT:  PatternDecl
+
+Pattern => erase op<test.op>;
+
+// -----
+
+Pattern => erase op<test.op2>;
+
+// =====
+
+Pattern => erase op<test.op3>;
--- a/mlir/test/mlir-translate/split-markers.mlir
+++ b/mlir/test/mlir-translate/split-markers.mlir
@ -0,0 +1,35 @@
+// Check that (1) the output split marker is inserted and (2) the input file is
+// split using the default split marker.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
+
+// With the second command, check that (3) the input split marker is used and
+// (4) the output split marker is empty if not specified.
+// RUN: mlir-translate %s -split-input-file="// ""-----" -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | mlir-translate -split-input-file -import-llvm \
+// RUN:   -split-input-file="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
+
+// Check that (5) the input is not split if `-split-input-file` is not given.
+// RUN: mlir-translate %s -mlir-to-llvmir \
+// RUN: | FileCheck -check-prefix=CHECK-NOSPLIT %s
+
+// CHECK-OUTPUT:      ModuleID
+// CHECK-OUTPUT:      ; -----
+// CHECK-OUTPUT-NEXT: ModuleID
+
+// CHECK-ROUNDTRIP:       module {{.*}} {
+// CHECK-ROUNDTRIP-NEXT:  }
+// CHECK-ROUNDTRIP-EMPTY:
+// CHECK-ROUNDTRIP:       module
+
+// CHECK-NOSPLIT:     ModuleID
+// CHECK-NOSPLIT-NOT: ModuleID
+
+module {}
+
+// -----
+
+module {}
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@ -136,11 +136,20 @@ int main(int argc, char **argv) {
      llvm::cl::desc(
          "Print out the parsed ODS information from the input file"),
      llvm::cl::init(false));
-  llvm::cl::opt<bool> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and process each "
-                     "chunk independently"),
-      llvm::cl::init(false));
+  llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));
+  llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for merging the ouput"),
+      llvm::cl::init(kDefaultSplitMarker));
  llvm::cl::opt<enum OutputType> outputType(
      "x", llvm::cl::init(OutputType::AST),
      llvm::cl::desc("The type of output desired"),
@ -187,7 +196,7 @@ int main(int argc, char **argv) {
                         dumpODS, includedFiles);
  };
  if (failed(splitAndProcessBuffer(std::move(inputFile), processFn, outputStrOS,
-                                   splitInputFile)))
+                                   inputSplitMarker, outputSplitMarker)))
    return 1;

  // Write the output.