[mlir] Make the split markers of splitAndProcessBuffer configurable. (#84765)

This allows to define custom splitters, which is interesting for
non-MLIR inputs and outputs to `mlir-translate`. For example, one may
use `; -----` as a splitter of `.ll` files. The splitters are now passed
as arguments into `splitAndProcessBuffer`, the input splitter defaulting
to the previous default (`// -----`) and the output splitter defaulting
to the empty string, which also corresponds to the previous default. The
behavior of the input split marker should not change at all; however,
outputs now have one new line *more* than before if there is no splitter
(old: `insertMarkerInOutput = false`, new: `outputSplitMarker = ""`) and
one new line *less* if there is one. The value of the input splitter is
exposed as a command line options of `mlir-translate` and other tools as
an optional value to the previously existing flag `-split-input-file`,
which defaults to the default splitter if not specified; the value of
the output splitter is exposed with the new `-output-split-marker`,
which default to the empty string in `mlir-translate` and the default
splitter in the other tools. In short, the previous usage or omission of
the flags should result in previous behavior (modulo the new lines
mentioned before).
This commit is contained in:
Ingo Müller 2024-03-14 13:55:50 +01:00 committed by GitHub
parent 2e271ceff6
commit 516ccce7fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 191 additions and 59 deletions

View File

@ -15,6 +15,8 @@
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include <memory>
namespace llvm {
@ -27,20 +29,24 @@ struct LogicalResult;
using ChunkBufferHandler = function_ref<LogicalResult(
std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
/// Splits the specified buffer on a marker (`// -----`), processes each chunk
/// independently according to the normal `processChunkBuffer` logic, and writes
/// all results to `os`.
extern inline const char *const kDefaultSplitMarker = "// -----";
/// Splits the specified buffer on a marker (`// -----` by default), processes
/// each chunk independently according to the normal `processChunkBuffer` logic,
/// and writes all results to `os`.
///
/// This is used to allow a large number of small independent tests to be put
/// into a single file. `enableSplitting` can be used to toggle if splitting
/// should be enabled, e.g. to allow for merging split and non-split code paths.
/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
/// between each of the processed output chunks.
/// into a single file. The input split marker is configurable. If it is empty,
/// merging is disabled, which allows for merging split and non-split code
/// paths. Output split markers (`//-----` by default) followed by a new line
/// character, respectively, are placed between each of the processed output
/// chunks. (The new line character is inserted even if the split marker is
/// empty.)
LogicalResult
splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer, raw_ostream &os,
bool enableSplitting = true,
bool insertMarkerInOutput = false);
llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
llvm::StringRef outputSplitMarker = "");
} // namespace mlir
#endif // MLIR_SUPPORT_TOOLUTILITIES_H

View File

@ -15,6 +15,7 @@
#include "mlir/Debug/CLOptionsSetup.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Support/ToolUtilities.h"
#include "llvm/ADT/StringRef.h"
#include <cstdlib>
@ -136,13 +137,24 @@ public:
}
bool shouldShowDialects() const { return showDialectsFlag; }
/// Set whether to split the input file based on the `// -----` marker into
/// pieces and process each chunk independently.
MlirOptMainConfig &splitInputFile(bool split = true) {
splitInputFileFlag = split;
/// Set the marker on which to split the input into chunks and process each
/// chunk independently. Input is not split if empty.
MlirOptMainConfig &
splitInputFile(std::string splitMarker = kDefaultSplitMarker) {
splitInputFileFlag = std::move(splitMarker);
return *this;
}
bool shouldSplitInputFile() const { return splitInputFileFlag; }
bool shouldSplitInputFile() const { return splitInputFileFlag.empty(); }
StringRef inputSplitMarker() const { return splitInputFileFlag; }
/// Set whether to merge the output chunks into one file using the given
/// marker.
MlirOptMainConfig &
outputSplitMarker(std::string splitMarker = kDefaultSplitMarker) {
outputSplitMarkerFlag = std::move(splitMarker);
return *this;
}
StringRef outputSplitMarker() const { return outputSplitMarkerFlag; }
/// Disable implicit addition of a top-level module op during parsing.
MlirOptMainConfig &useExplicitModule(bool useExplicitModule) {
@ -215,9 +227,12 @@ protected:
/// Show the registered dialects before trying to load the input file.
bool showDialectsFlag = false;
/// Split the input file based on the `// -----` marker into pieces and
/// process each chunk independently.
bool splitInputFileFlag = false;
/// Split the input file based on the given marker into chunks and process
/// each chunk independently. Input is not split if empty.
std::string splitInputFileFlag = "";
/// Merge output chunks into one file using the given marker.
std::string outputSplitMarkerFlag = "";
/// Use an explicit top-level module op during parsing.
bool useExplicitModuleFlag = false;

View File

@ -21,22 +21,20 @@ using namespace mlir;
LogicalResult
mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer,
raw_ostream &os, bool enableSplitting,
bool insertMarkerInOutput) {
raw_ostream &os, llvm::StringRef inputSplitMarker,
llvm::StringRef outputSplitMarker) {
// If splitting is disabled, we process the full input buffer.
if (!enableSplitting)
if (inputSplitMarker.empty())
return processChunkBuffer(std::move(originalBuffer), os);
const char splitMarkerConst[] = "// -----";
StringRef splitMarker(splitMarkerConst);
const int splitMarkerLen = splitMarker.size();
const int inputSplitMarkerLen = inputSplitMarker.size();
auto *origMemBuffer = originalBuffer.get();
SmallVector<StringRef, 8> rawSourceBuffers;
const int checkLen = 2;
// Split dropping the last checkLen chars to enable flagging near misses.
origMemBuffer->getBuffer().split(rawSourceBuffers,
splitMarker.drop_back(checkLen));
inputSplitMarker.drop_back(checkLen));
if (rawSourceBuffers.empty())
return success();
@ -58,8 +56,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
}
// Check that suffix is as expected and doesn't have any dash post.
bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
buffer.size() > checkLen && buffer[checkLen] != '0';
bool expectedSuffix =
buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
buffer.size() > checkLen && buffer[checkLen] != '0';
if (expectedSuffix) {
sourceBuffers.push_back(prev);
prev = buffer.drop_front(checkLen);
@ -69,8 +68,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
llvm::SourceMgr::DK_Warning,
"near miss with file split marker");
prev = StringRef(prev.data(),
prev.size() + splitMarkerLen - checkLen + buffer.size());
prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
checkLen + buffer.size());
}
}
if (!prev.empty())
@ -89,7 +88,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
hadFailure = true;
};
llvm::interleave(sourceBuffers, os, interleaveFn,
insertMarkerInOutput ? "\n// -----\n" : "");
(llvm::Twine(outputSplitMarker) + "\n").str());
// If any fails, then return a failure of the tool.
return failure(hadFailure);

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Tools/lsp-server-support/Transport.h"
#include "mlir/Support/ToolUtilities.h"
#include "mlir/Tools/lsp-server-support/Logging.h"
#include "mlir/Tools/lsp-server-support/Protocol.h"
#include "llvm/ADT/SmallString.h"
@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
StringRef lineRef = line.str().trim();
if (lineRef.starts_with("//")) {
// Found a delimiter for the message.
if (lineRef == "// -----")
if (lineRef == kDefaultSplitMarker)
break;
continue;
}

View File

@ -15,6 +15,7 @@
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Support/ToolUtilities.h"
#include "mlir/Tools/lsp-server-support/Logging.h"
#include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
#include "llvm/ADT/StringExtras.h"
@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
context.allowUnregisteredDialects();
// Split the file into separate MLIR documents.
// TODO: Find a way to share the split file marker with other tools. We don't
// want to use `splitAndProcessBuffer` here, but we do want to make sure this
// marker doesn't go out of sync.
SmallVector<StringRef, 8> subContents;
StringRef(contents).split(subContents, "// -----");
StringRef(contents).split(subContents, kDefaultSplitMarker);
chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));

View File

@ -127,11 +127,21 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
cl::desc("Print the list of registered dialects and exit"),
cl::location(showDialectsFlag), cl::init(false));
static cl::opt<bool, /*ExternalStorage=*/true> splitInputFile(
"split-input-file",
cl::desc("Split the input file into pieces and process each "
"chunk independently"),
cl::location(splitInputFileFlag), cl::init(false));
static cl::opt<std::string, /*ExternalStorage=*/true> splitInputFile(
"split-input-file", llvm::cl::ValueOptional,
cl::callback([&](const std::string &str) {
// Implicit value: use default marker if flag was used without value.
if (str.empty())
splitInputFile.setValue(kDefaultSplitMarker);
}),
cl::desc("Split the input file into chunks using the given or "
"default marker and process each chunk independently"),
cl::location(splitInputFileFlag), cl::init(""));
static cl::opt<std::string, /*ExternalStorage=*/true> outputSplitMarker(
"output-split-marker",
cl::desc("Split marker to use for merging the ouput"),
cl::location(outputSplitMarkerFlag), cl::init(kDefaultSplitMarker));
static cl::opt<bool, /*ExternalStorage=*/true> verifyDiagnostics(
"verify-diagnostics",
@ -533,8 +543,8 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
threadPool);
};
return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
config.shouldSplitInputFile(),
/*insertMarkerInOutput=*/true);
config.inputSplitMarker(),
config.outputSplitMarker());
}
LogicalResult mlir::MlirOptMain(int argc, char **argv,

View File

@ -10,6 +10,7 @@
#include "Protocol.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Support/ToolUtilities.h"
#include "mlir/Tools/PDLL/AST/Context.h"
#include "mlir/Tools/PDLL/AST/Nodes.h"
#include "mlir/Tools/PDLL/AST/Types.h"
@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
[&](PDLTextFileChunk &chunk) {
chunk.document.getPDLLViewOutput(outputOS, kind);
},
[&] { outputOS << "\n// -----\n\n"; });
[&] { outputOS << "\n"
<< kDefaultSplitMarker << "\n\n"; });
}
return result;
}
@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
chunks.clear();
// Split the file into separate PDL documents.
// TODO: Find a way to share the split file marker with other tools. We don't
// want to use `splitAndProcessBuffer` here, but we do want to make sure this
// marker doesn't go out of sync.
SmallVector<StringRef, 8> subContents;
StringRef(contents).split(subContents, "// -----");
StringRef(contents).split(subContents, kDefaultSplitMarker);
chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
/*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
diagnostics));

View File

@ -62,11 +62,16 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
llvm::cl::desc("Allow operation with no registered dialects (discouraged: testing only!)"),
llvm::cl::init(false));
static llvm::cl::opt<bool> splitInputFile(
"split-input-file",
llvm::cl::desc("Split the input file into pieces and "
"process each chunk independently"),
llvm::cl::init(false));
static llvm::cl::opt<std::string> inputSplitMarker(
"split-input-file", llvm::cl::ValueOptional,
llvm::cl::callback([&](const std::string &str) {
// Implicit value: use default marker if flag was used without value.
if (str.empty())
inputSplitMarker.setValue(kDefaultSplitMarker);
}),
llvm::cl::desc("Split the input file into chunks using the given or "
"default marker and process each chunk independently"),
llvm::cl::init(""));
static llvm::cl::opt<bool> verifyDiagnostics(
"verify-diagnostics",
@ -80,6 +85,11 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
"(discouraged: testing only!)"),
llvm::cl::init(false));
static llvm::cl::opt<std::string> outputSplitMarker(
"output-split-marker",
llvm::cl::desc("Split marker to use for merging the ouput"),
llvm::cl::init(""));
llvm::InitLLVM y(argc, argv);
// Add flags for all the registered translations.
@ -176,7 +186,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
};
if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
output->os(), splitInputFile)))
output->os(), inputSplitMarker,
outputSplitMarker)))
return failure();
output->keep();

View File

@ -1,6 +1,13 @@
// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t && FileCheck --input-file %t %s
// Check near-miss mechanics:
// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t \
// RUN: && FileCheck --input-file %t %s
// RUN: cat %t
// Check that (1) custom input splitter and (2) custom output splitters work.
// RUN: mlir-opt %s -split-input-file="// CHECK: ""----" \
// RUN: -output-split-marker="// ---- next split ----" \
// RUN: | FileCheck -input-file %s -check-prefix=CHECK-SPLITTERS %s
func.func @main() {return}
// -----
@ -20,3 +27,9 @@ func.func @bar2() {return }
// No error flagged at the end for a near miss.
// ----
// CHECK-SPLITTERS: module
// CHECK-SPLITTERS: ---- next split ----
// CHECK-SPLITTERS: module
// CHECK-SPLITTERS: ---- next split ----
// CHECK-SPLITTERS: module

View File

@ -0,0 +1,36 @@
// Check that (1) the default input split marker used if no custom marker is
// specified and (2) the output file is merged using the default marker.
// RUN: mlir-pdll %s -split-input-file \
// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s
// Check that the custom (3) input and (output) split markers are used if
// provided.
// RUN: mlir-pdll %s \
// RUN: -split-input-file="// ""=====" -output-split-marker "// #####" \
// RUN: | FileCheck -check-prefix=CHECK-CUSTOM %s
// CHECK-DEFAULT: Module
// CHECK-DEFAULT-NEXT: PatternDecl
// CHECK-DEFAULT-NOT: PatternDecl
// CHECK-DEFAULT: //{{ }}-----
// CHECK-DEFAULT-NEXT: Module
// CHECK-DEFAULT-NEXT: PatternDecl
// CHECK-DEFAULT: PatternDecl
// CHECK-CUSTOM: Module
// CHECK-CUSTOM-NEXT: PatternDecl
// CHECK-CUSTOM: PatternDecl
// CHECK-CUSTOM: // #####
// CHECK-CUSTOM-NEXT: Module
// CHECK-CUSTOM-NEXT: PatternDecl
// CHECK-CUSTOM-NOT: PatternDecl
Pattern => erase op<test.op>;
// -----
Pattern => erase op<test.op2>;
// =====
Pattern => erase op<test.op3>;

View File

@ -0,0 +1,35 @@
// Check that (1) the output split marker is inserted and (2) the input file is
// split using the default split marker.
// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
// RUN: -output-split-marker="; -----" \
// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
// With the second command, check that (3) the input split marker is used and
// (4) the output split marker is empty if not specified.
// RUN: mlir-translate %s -split-input-file="// ""-----" -mlir-to-llvmir \
// RUN: -output-split-marker="; -----" \
// RUN: | mlir-translate -split-input-file -import-llvm \
// RUN: -split-input-file="; -----" \
// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
// Check that (5) the input is not split if `-split-input-file` is not given.
// RUN: mlir-translate %s -mlir-to-llvmir \
// RUN: | FileCheck -check-prefix=CHECK-NOSPLIT %s
// CHECK-OUTPUT: ModuleID
// CHECK-OUTPUT: ; -----
// CHECK-OUTPUT-NEXT: ModuleID
// CHECK-ROUNDTRIP: module {{.*}} {
// CHECK-ROUNDTRIP-NEXT: }
// CHECK-ROUNDTRIP-EMPTY:
// CHECK-ROUNDTRIP: module
// CHECK-NOSPLIT: ModuleID
// CHECK-NOSPLIT-NOT: ModuleID
module {}
// -----
module {}

View File

@ -136,11 +136,20 @@ int main(int argc, char **argv) {
llvm::cl::desc(
"Print out the parsed ODS information from the input file"),
llvm::cl::init(false));
llvm::cl::opt<bool> splitInputFile(
"split-input-file",
llvm::cl::desc("Split the input file into pieces and process each "
"chunk independently"),
llvm::cl::init(false));
llvm::cl::opt<std::string> inputSplitMarker(
"split-input-file", llvm::cl::ValueOptional,
llvm::cl::callback([&](const std::string &str) {
// Implicit value: use default marker if flag was used without value.
if (str.empty())
inputSplitMarker.setValue(kDefaultSplitMarker);
}),
llvm::cl::desc("Split the input file into chunks using the given or "
"default marker and process each chunk independently"),
llvm::cl::init(""));
llvm::cl::opt<std::string> outputSplitMarker(
"output-split-marker",
llvm::cl::desc("Split marker to use for merging the ouput"),
llvm::cl::init(kDefaultSplitMarker));
llvm::cl::opt<enum OutputType> outputType(
"x", llvm::cl::init(OutputType::AST),
llvm::cl::desc("The type of output desired"),
@ -187,7 +196,7 @@ int main(int argc, char **argv) {
dumpODS, includedFiles);
};
if (failed(splitAndProcessBuffer(std::move(inputFile), processFn, outputStrOS,
splitInputFile)))
inputSplitMarker, outputSplitMarker)))
return 1;
// Write the output.