diff --git a/hugo/content/docs/recipes/builtin-library.md b/hugo/content/docs/recipes/builtin-library.md index 974047cd..fcb3e902 100644 --- a/hugo/content/docs/recipes/builtin-library.md +++ b/hugo/content/docs/recipes/builtin-library.md @@ -24,8 +24,9 @@ import { AstNode, DefaultWorkspaceManager, LangiumDocument, - LangiumSharedServices + LangiumDocumentFactory } from "langium"; +import { LangiumSharedServices } from "langium/lsp"; import { WorkspaceFolder } from 'vscode-languageserver'; import { URI } from "vscode-uri"; import { builtinHelloWorld } from './builtins'; @@ -162,3 +163,14 @@ export function activate(context: vscode.ExtensionContext) { This registers an in-memory file system for vscode to use for the `builtin` file schema. Every time vscode is supposed to open a file with this schema, it will invoke the `stat` and `readFile` methods of the registered file system provider. + +To ensure that LSP services (such as hover, outline, go to definition, etc.) work properly inside a built-in file, make sure that LanguageClientOptions is correctly configured. The document selector used for your language should handle the `builtin` scheme. It is recommended to support all schemes, either by removing the scheme option or by setting the scheme option to `'*'`. + +```ts +// Options to control the language client +clientOptions: LanguageClientOptions = { + documentSelector: [{ language: 'mydsl' }], + // Alternatively: + documentSelector: [{ scheme: '*', language: 'mydsl' }], +} +``` diff --git a/hugo/content/docs/recipes/performance/_index.md b/hugo/content/docs/recipes/performance/_index.md new file mode 100644 index 00000000..5f2a4082 --- /dev/null +++ b/hugo/content/docs/recipes/performance/_index.md @@ -0,0 +1,4 @@ +--- +title: "Performance" +weight: 175 +--- \ No newline at end of file diff --git a/hugo/content/docs/recipes/performance/caches.md b/hugo/content/docs/recipes/performance/caches.md new file mode 100644 index 00000000..f2a7a1b6 --- /dev/null +++ b/hugo/content/docs/recipes/performance/caches.md @@ -0,0 +1,215 @@ +--- +title: "Caches" +weight: 0 +--- + +## What is the problem? + +You have parsed a document and you would like to execute some computation on the AST. But you don’t want to do this every time you see a certain node. You want to do it once for the lifetime of a document. Where to save it? + +## How to solve it? + +For data that depends on the lifetime of a document or even the entire workspace, Langium has several kinds of caches: + +* the document cache saves key-value-pairs of given types `K` and `V` for each document. If the document gets changed or deleted the cache gets cleared automatically for the single files +* the workspace cache also saves key-value-pairs of given types `K` and `V`, but gets cleared entirely when something in the workspace gets changed + +Besides those specific caches, Langium also provides: + +* a simple cache that can be used for any kind of key-value-data +* a context cache that stores a simple cache for each context object. The document cache and workspace cache are implemented using the context cache + +## How to use it? + +Here we will use the `HelloWorld` example from the learning section. Let's keep it simple and just list people in a document, which will come from a comic book. + +We will have a computation for each person that determines from which publisher it comes from. + +### Add a database + +Let's build a "publisher inferer service". First let's create a small database of known publishers and known persons: + +```typescript +type KnownPublisher = 'DC' | 'Marvel' | 'Egmont'; +const KnownPersonNames: Record = { + DC: ['Superman', 'Batman', 'Aquaman', 'Wonderwoman', 'Flash'], + Marvel: ['Spiderman', 'Wolverine', 'Deadpool'], + Egmont: ['Asterix', 'Obelix'] +}; +``` + +### Define the computation service + +For our service we define an interface: + +```typescript +export interface InferPublisherService { + inferPublisher(person: Person): KnownPublisher | undefined; +} +``` + +Now we implement the service: + +```typescript +class UncachedInferPublisherService implements InferPublisherService { + inferPublisher(person: Person): KnownPublisher | undefined { + for (const [publisher, persons] of Object.entries(KnownPersonNames)) { + if (persons.includes(person.name)) { + return publisher as KnownPublisher; + } + } + return undefined; + } +} +``` + +### Add a cache + +Now we want to cache the results of the `inferPublisher` method. We can use the `DocumentCache` for this. We will reuse the uncached service as base class and override the `inferPublisher` method: + +```typescript +export class CachedInferPublisherService extends UncachedInferPublisherService { + private readonly cache: DocumentCache; + constructor(services: HelloWorldServices) { + super(); + this.cache = new DocumentCache(services.shared); + } + override inferPublisher(person: Person): KnownPublisher | undefined { + const documentUri = AstUtils.getDocument(person).uri; + //get cache entry for the documentUri and the person + //if it does not exist, calculate the value and store it + return this.cache.get(documentUri, person, () => super.inferPublisher(person)); + } +} +``` + +### Use the service + +To use this service, let's create a validator that checks if the publisher of a person is known. Go to the `hello-world-validator.ts` file and add the following code: + +```typescript +import type { ValidationAcceptor, ValidationChecks } from 'langium'; +import type { HelloWorldAstType, Person } from './generated/ast.js'; +import type { HelloWorldServices } from './hello-world-module.js'; +import { InferPublisherService } from './infer-publisher-service.js'; + +/** + * Register custom validation checks. + */ +export function registerValidationChecks(services: HelloWorldServices) { + const registry = services.validation.ValidationRegistry; + const validator = services.validation.HelloWorldValidator; + const checks: ValidationChecks = { + Person: validator.checkPersonIsFromKnownPublisher + }; + registry.register(checks, validator); +} + +/** + * Implementation of custom validations. + */ +export class HelloWorldValidator { + private readonly inferPublisherService: InferPublisherService; + + constructor(services: HelloWorldServices) { + this.inferPublisherService = services.utilities.inferPublisherService; + } + + checkPersonIsFromKnownPublisher(person: Person, accept: ValidationAcceptor): void { + if (!this.inferPublisherService.inferPublisher(person)) { + accept('warning', `"${person.name}" is not from a known publisher.`, { + node: person + }); + } + } + +} +``` + +### Register the service + +Finally, we need to register the service in the module. Go to the `hello-world-module.ts` file and add the following code: + +```typescript +export type HelloWorldAddedServices = { + utilities: { + inferPublisherService: InferPublisherService + }, + validation: { + HelloWorldValidator: HelloWorldValidator + } +} +//... +export const HelloWorldModule: Module = { + utilities: { + inferPublisherService: (services) => new CachedInferPublisherService(services) + }, + validation: { + //add `services` parameter here + HelloWorldValidator: (services) => new HelloWorldValidator(services) + } +}; +``` + +### Test the result + +Start the extension and create a `.hello` file with several persons, like this one: + +```plaintext +person Wonderwoman +person Spiderman +person Homer //warning: unknown publisher!! +person Obelix +``` + +## Last words + +Caching can improve the performance of your language server. It is especially useful for computations that are expensive to calculate. The `DocumentCache` and `WorkspaceCache` are the most common caches to use. The `ContextCache` is useful if you need to store data for a specific context object. If you only need a key-value store, you can use the `SimpleCache`. +All of these caches are disposable compared to a simple `Map`. If you dispose them by calling `dispose()` the entries will be removed and the memory will be freed. Plus, from the moment you have called `dispose()`, the cache will not react to changes in the workspace anymore. + +## Appendix + +
+Full implementation + +```typescript +import { AstUtils, DocumentCache } from "langium"; +import { Person } from "./generated/ast.js"; +import { HelloWorldServices } from "./hello-world-module.js"; + +type KnownPublisher = 'DC' | 'Marvel' | 'Egmont'; +const KnownPersonNames: Record = { + DC: ['Superman', 'Batman', 'Aquaman', 'Wonderwoman', 'Flash'], + Marvel: ['Spiderman', 'Wolverine', 'Deadpool'], + Egmont: ['Asterix', 'Obelix'] +}; + +export interface InferPublisherService { + inferPublisher(person: Person): KnownPublisher | undefined; +} + +class UncachedInferPublisherService implements InferPublisherService { + inferPublisher(person: Person): KnownPublisher | undefined { + for (const [publisher, persons] of Object.entries(KnownPersonNames)) { + if (persons.includes(person.name)) { + return publisher as KnownPublisher; + } + } + return undefined; + } +} + +export class CachedInferPublisherService extends UncachedInferPublisherService { + private readonly cache: DocumentCache; + constructor(services: HelloWorldServices) { + super(); + this.cache = new DocumentCache(services.shared); + } + override inferPublisher(person: Person): KnownPublisher | undefined { + const documentUri = AstUtils.getDocument(person).uri; + return this.cache.get(documentUri, person, () => super.inferPublisher(person)); + } +} +``` + +
diff --git a/hugo/content/docs/recipes/validation/_index.md b/hugo/content/docs/recipes/validation/_index.md new file mode 100644 index 00000000..9f65581c --- /dev/null +++ b/hugo/content/docs/recipes/validation/_index.md @@ -0,0 +1,4 @@ +--- +title: Validation +weight: 150 +--- \ No newline at end of file diff --git a/hugo/content/docs/recipes/validation/dependency-loops.md b/hugo/content/docs/recipes/validation/dependency-loops.md new file mode 100644 index 00000000..c02a7505 --- /dev/null +++ b/hugo/content/docs/recipes/validation/dependency-loops.md @@ -0,0 +1,336 @@ +--- +title: "Dependency Loops" +weight: 100 +--- + +## What is the problem? + +If you are building some composite data structures or have some computation flow, you might be interested whether the product, that you are generating, does contain any loops back to the already used product. We want to implement a validation that does this detection in the background and notifies us by highlighting the lines causing this problem. +The second part of the problem is the resolution of the dependencies. If there are no loops, we want to get a resolution of the dependencies in a loop-free order. + +Examples for such dependency loops are: + +* For data structures you could think of a structure in C that references itself (without using the pointer notation). This would lead to an infinitely expanding data type, which is practically not doable. + + ```c + typedef struct { + int value; + LinkedList next; //error, should be "MyStruct* next;", a pointer to next + } LinkedList; + ``` + +* Or for control flows these loops can be interpreted as recursion detection, a function that calls itself (with any number of function calls to other functions in-between). + + ```c + void foo() { + bar(); + } + void bar() { + answer42(); + } + void answer42() { + bar(); //error, foo calls bar, bar calls answer42, answer42 calls foo + } + ``` + +Regardless of what usecase you have, you might have an interest to detect those loops and get early feedback in the shape of a validation error. + +The other point is the **resolution for loop-free dependencies**. Think of a net of package imports in +a programming language. You want to know the order in which you can import the packages without getting into trouble. + +```plaintext +A -> B -> C +A -> C +C -> D + +//resolution: A, B, C, D +``` + +Or think of a function call graph. You want to know the order in which you can build the functions such that every dependency was built before the dependent function. + +```c +void answer42() { + printf("42\n"); +} +void bar1() { + answer42(); +} +void foo() { + bar1(); + bar2(); +} +void bar2() { + answer42(); +} +``` + +Here the resolution would be: `answer42`, `bar1`, `bar2`, `foo`. + +## How to solve it? + +There are two approaches for a loop detection and the loop-free resolution depending on the nature of your situation. + +### Simple nature + +#### Simple detection + +If you have a `1:n` relationship like the `super class`-to-`sub class` relation for classes, you can do it by simply walking along the parent route (or in this specific example the `super class`-route). Just keep in mind all visited places and if one parent is already in that list, you have detected a loop! + +```java +public class A extends B {} +public class B extends C {} +public class C extends A {} //error +``` + +#### Simple resolution + +Assuming that you have no loops back, you can resolve a list of dependencies. +You do a simple depth-first-search, starting with the parent visiting the children afterwards (recursively). + +```java +public class A {} //add A +public class B extends A {} //add A -> B +public class C extends A {} //add A -> C +public class D extends C {} //add C -> D +public class E extends C {} //add C -> E + +//resolution: A, (B or (C, (D or E))) +``` + +### Complex nature + +#### Complex detection + +If you have a `n:m` relationship like it is given for function calls (a function can be called by `m` function and can call `n` functions), you can solve the question for loops by creating a directed graph. + +In this example the nodes are the set of all functions and function calls are stored as edges (for each call from function A to every function B). +The key algorithm is the search for the so-called strongly-connected components in the resulting graph. + +Please use an existing solution for the algorithm (keep in mind the effort you can avoid and a quality you can gain)! The algorithm is able to output every loop with all its members of that loop. But you are free to make your own implementation. + +#### Complex resolution + +The directed graph approach can be processed further when there were no loops found: + +With a "topological sort" you can get an order that respects all dependencies. Means more or less: You start with the node that has no dependencies, remove it, put it into your sorted list and do the same for the resulting graph again and again until all dependencies were resolved. + +The topological sort (as well as the strongly-connected component search) is a standard algorithm in every good graph library. + +## How to make it work in Langium? + +In the following example we will resolve the dependencies for a complex nature of data. + +Therefore we will take the `HelloWorld` example from the learning section and extend it with a validation that checks for greeting loops. Greeting loops are forbidden in this example. When `A` greets `B` and `B` greets `C`, then `C` must not greet `A`. + +### Adapt the grammar + +We will change the `HelloWorld` grammar, so that persons can greet each other. After that, we will introduce a validation in order to forbid "greeting loops". + +```langium +grammar HelloWorld + +entry Model: + (persons+=Person | greetings+=Greeting)*; + +Person: + 'person' name=ID; + +Greeting: + greeter=[Person:ID] 'greets' greeted=[Person:ID] '!'; + +hidden terminal WS: /\s+/; +terminal ID: /[_a-zA-Z][\w_]*/; +```` + +After the change build your grammar with `npm run langium:generate`. + +### Loop detection + +Now we will add the validation. Here we will use the graph library ‚graphology‘. Please install these three packages (`graphology` contains the data structure, `graphology-components` contains the strongly-connected component search, `graphology-dag` contains the topological sort): + +```bash +npm install graphology graphology-components graphology-dag +``` + +Open the `hello-world-validator.ts` and add another validator for `Model`. It is important to say that we do not create a check on the `Greeting` level, because we need the overview over all greetings. The complete overview is given for the `Model` AST node. It would be possible to just calculate cycles for a single greeting or person, but that is more complex and less performant! + +```typescript +const checks: ValidationChecks = { + Model: validator.checkGreetingCycles, // new!!! + Person: validator.checkPersonStartsWithCapital +}; +``` + +And here is the implementation: + +```typescript +checkGreetingCycles(model: Model, accept: ValidationAcceptor): void { + //arrange the graph + const graph = new DirectedGraph<{}, {greeting: Greeting}>(); + model.persons.forEach(person => { + graph.addNode(person.name); + }) + model.greetings.forEach(greeting => { + if(greeting.greeter.ref && greeting.greeted.ref && !graph.hasDirectedEdge(greeting.greeter.ref.name, greeting.greeted.ref.name)) { + graph.addEdge(greeting.greeter.ref.name, greeting.greeted.ref.name, { + greeting //we store the greeting for later reference in the validation message + }); + } + }); + + //compute the components + const components = stronglyConnectedComponents(graph); + + //evaluate result (filter out size-1-components) + const actualLoops = components.filter(c => c.length > 1); + for (const component of actualLoops) { + const set = new Set(component); + //for each node in the component... + for (const from of set) { + //check whether the out edges... + for (const { target: to, attributes: { greeting } } of graph.outEdgeEntries(from)) { + //are within the component + if(set.has(to)) { + //if yes, set an error on the corresponding greeting + accept("error", "Greeting loop detected!", { + node: greeting + }); + } + } + } + } +} +``` + +After finishing your validator, do not forget to build your project with `npm run build`. +So a `.hello` file like this one, would have 3 greetings with an error: + +```plaintext +person Homer +person Marge +person Pinky +person Brain + +Homer greets Marge! //error +Marge greets Brain! //error +Brain greets Homer! //error +Pinky greets Marge! +``` + +Here is the screenshot of VS Code with the error: + +![Greeting loop errors](/assets/dependency-loops.png) + +### Dependency resolution + +The topological sort can be done like this: + +```typescript +import { topologicalSort } from 'graphology-dag'; + +//resolvedOrder is an array of person names! +const resolvedOrder = topologicalSort(graph); +``` + +This will give you back an order of greeters. The rule would be like: `You can only greet if every greeting addressed to you was already spoken out.` +For a `.hello` file like this, we would get the order: `Homer`, `Brain`, `Pinky`, `Marge`. + +```plaintext +person Homer +person Marge + +person Pinky +person Brain + +Homer greets Marge! +Brain greets Pinky! +Pinky greets Marge! +``` + +* `Homer` is not greeted by anyone, so he can start greeting `Marge`. +* `Marge` and `Pinky` are blocked by `Pinky` and `Brain`. +* `Brain` is the next and unblocks `Pinky`. +* After `Pinky` is done, `Marge` is unblocked as well. +* But `Marge` has no one to greet. +* So, we are done. + +## Appendix + +
+Full Implementation + +```ts +import type { ValidationAcceptor, ValidationChecks } from 'langium'; +import type { Greeting, HelloWorldAstType, Model } from './generated/ast.js'; +import type { HelloWorldServices } from './hello-world-module.js'; +import { DirectedGraph } from 'graphology'; +import { stronglyConnectedComponents } from 'graphology-components'; +import { topologicalSort } from 'graphology-dag'; + +/** + * Register custom validation checks. + */ +export function registerValidationChecks(services: HelloWorldServices) { + const registry = services.validation.ValidationRegistry; + const validator = services.validation.HelloWorldValidator; + const checks: ValidationChecks = { + Model: validator.checkGreetingCycles, + //Not needed for this example + //Person: validator.checkPersonStartsWithCapital + }; + registry.register(checks, validator); +} + +/** + * Implementation of custom validations. + */ +export class HelloWorldValidator { + checkGreetingCycles(model: Model, accept: ValidationAcceptor): void { + //arrange the graph + const graph = new DirectedGraph<{}, {greeting: Greeting}>(); + model.persons.forEach(person => { + graph.addNode(person.name); + }) + model.greetings.forEach(greeting => { + if(greeting.greeter.ref && greeting.greeted.ref && !graph.hasDirectedEdge(greeting.greeter.ref.name, greeting.greeted.ref.name)) { + graph.addEdge(greeting.greeter.ref.name, greeting.greeted.ref.name, { + greeting + }); + } + }); + + //compute the components + const components = stronglyConnectedComponents(graph); + + //evaluate result (filter out size-1-components) + const actualLoops = components.filter(c => c.length > 1); + for (const component of actualLoops) { + const set = new Set(component); + //for each node in the component... + for (const from of set) { + //check whether the out edges... + for (const { target: to, attributes: { greeting } } of graph.outEdgeEntries(from)) { + //are within the component + if(set.has(to)) { + //if yes, set an error on the corresponding greeting + accept("error", "Greeting loop detected!", { + node: greeting + }); + } + } + } + } + + //resolve all dependencies + if(actualLoops.length === 0) { + const resolvedOrder = topologicalSort(graph); + //this is done as a hint, just for demonstration purposes + accept('hint', "Please greet in the following greeter order: "+resolvedOrder.join(", "), { + node: model + }); + } + } +} +``` + +
diff --git a/hugo/static/assets/dependency-loops.png b/hugo/static/assets/dependency-loops.png new file mode 100644 index 00000000..f658d0dd Binary files /dev/null and b/hugo/static/assets/dependency-loops.png differ