first commit

This commit was merged in pull request #1.
This commit is contained in:
Tinglyyy
2025-12-13 16:12:41 +01:00
parent a7ccf84817
commit f07e0bc50a
52 changed files with 2657 additions and 0 deletions

View File

@@ -0,0 +1,136 @@
package org.openautonomousconnection;
import org.jetbrains.annotations.NotNull;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Will be removed once UnlegitLibrary.StringUtils contains these methods
*/
@Deprecated(since = "1.0")
public class StringUtils_Remove_Please {
public static boolean equalsIgnoreWhiteSpaces(String s1, String s2) {
return s1.replaceAll("\\s", "")
.equalsIgnoreCase(s2.replaceAll("\\s", ""));
}
public static String[] splitSeq(String[] tokens, String seq) {
List<String> _tokens = new ArrayList<>();
for(int i = 0; i < tokens.length; i++) {
String s = tokens[i];
if(!s.contains(seq)) {
_tokens.add(s);
continue;
}
String[] split = s.split(seq);
for(int j = 0; j < split.length-1; j++)
_tokens.add(split[j] + seq);
if(s.endsWith(seq))
_tokens.add(split[split.length-1] + seq);
else
_tokens.add(split[split.length-1]);
}
String lastToken = _tokens.getLast();
if(!lastToken.isEmpty())
_tokens.set(_tokens.size()-1, lastToken.substring(0, lastToken.length()-1));
return _tokens.toArray(new String[0]);
}
public static int countSeq(String string, String seq) {
int amount = -1;
for(String s : string.split(seq))
amount++;
return amount;
}
public static List<List<String>> getEncapsulatedTexts(String text, String... capsules) {
List<List<String>> lists = new ArrayList<>();
lists.add(new ArrayList<>());
lists.add(new ArrayList<>());
while (!text.isEmpty())
{
String capsule = containsManySorted(text, capsules)[0];
if(capsule.isEmpty()) {
lists.getFirst().add(text);
break;
}
String out = text.substring(0, text.indexOf(capsule));
text = text.substring(text.indexOf(capsule) + capsule.length());
String in = text.substring(0, text.indexOf(capsule));
text = text.substring(text.indexOf(capsule) + capsule.length());
lists.get(0).add(out);
lists.get(1).add(in);
}
return lists;
}
public static String[] containsMany(String string, String... strings) {
String[] result = new String[strings.length];
for(int i = 0; i < strings.length; i++)
if(string.contains(strings[i]))
result[i] = strings[i];
return result;
}
public static String[] containsManySorted(String string, String... strings) {
String[] result = new String[strings.length];
StringPositionSorted[] records = new StringPositionSorted[strings.length];
for(int i = 0; i < strings.length; i++)
if(string.contains(strings[i]))
records[i] = new StringPositionSorted(strings[i], string.indexOf(strings[i]));
else
records[i] = new StringPositionSorted("", -1);
Arrays.sort(records);
int off = 0;
for(int i = 0; i + off < strings.length; i++) {
while (records[i + off].position == -1 &&
i + off + 1 < strings.length)
off++;
result[i] = records[i + off].string();
}
return result;
}
private record StringPositionSorted(String string, int position) implements Comparable<StringPositionSorted> {
@Override
public int compareTo(@NotNull StringPositionSorted o) {
return this.position - o.position;
}
}
}

View File

@@ -0,0 +1,157 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser;
import dev.unlegitdqrk.unlegitlibrary.string.StringUtils;
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment;
import lombok.Getter;
import lombok.Setter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DocumentBuilder {
@Getter @Setter
protected String content;
@Getter
protected List<HTMLComment> comments;
@Getter
protected List<String> attributes, texts, tags;
public DocumentBuilder(String content) {
this.content = content; //content.replace("\n", "");
this.comments = new ArrayList<>();
this.attributes = new ArrayList<>();
this.texts = new ArrayList<>();
}
/**
* Extracts all comments and strings into lists
*/
public void extract() {
this.extractComments();
this.extractStringsAndAttributes();
this.extractTexts();
}
/**
* inserts the extracts back into the content string
*/
public void insert() {
this.insertTexts();
this.insertStringsAndAttributes();
this.insertComments();
}
protected void extractComments() {
Pattern pattern = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
int index = 0;
while (matcher.find()) {
this.content = this.content.replace("<!--" + matcher.group(1) + "-->", "<!--C" + index + "-->");
this.comments.add(new HTMLComment(matcher.group(1)));
index++;
}
}
protected void insertComments() {
if(this.comments.isEmpty())
return;
int i = 0;
for(; i < this.comments.size(); i++)
this.content = this.content.replace("<!--C" + i + "-->", this.comments.get(i).toString());
for(; i > 0; i--)
this.comments.removeFirst();
}
protected void extractStringsAndAttributes() {
Pattern pattern = Pattern.compile("\"(.*?)\"|'(.*?)'", Pattern.DOTALL);
Matcher matcher = pattern.matcher(this.content);
int index = 0;
while (matcher.find()) {
if(matcher.group(1) != null) {
this.content = this.content.replace("\"" + matcher.group(1) + "\"", "\"S" + index + "\"");
this.attributes.add(matcher.group(1));
}
else {
this.content = this.content.replace("'" + matcher.group(2) + "'", "'S" + index + "'");
this.attributes.add(matcher.group(2));
}
index++;
}
}
protected void insertStringsAndAttributes() {
if(this.attributes.isEmpty())
return;
int i = 0;
for(; i < this.attributes.size(); i++) {
this.content = this.content.replace("\"S" + i + "\"", "\"" + attributes.get(i) + "\"");
this.content = this.content.replace("'S" + i + "'", "'" + attributes.get(i) + "'");
}
for(; i > 0; i--)
this.attributes.removeFirst();
}
protected void extractTexts() {
Pattern pattern = Pattern.compile(">([^<]+)(?=<)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
int index = 0;
while (matcher.find()) {
if(StringUtils.isEmptyString(matcher.group(1)))
continue;
this.content = this.content.replace(">" + matcher.group(1) + "<", ">T" + index + "<");
this.texts.add(matcher.group(1));
index++;
}
}
protected void insertTexts() {
if(this.texts.isEmpty())
return;
int i = 0;
for(; i < this.texts.size(); i++)
this.content = this.content.replace(">T" + i + "<", ">" + this.texts.get(i) + "<");
for(; i > 0; i--)
this.texts.removeFirst();
}
protected void extractTags() {
}
}

View File

@@ -0,0 +1,18 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser;
import org.jetbrains.annotations.NotNull;
public record ParseResult(String tagname, String assumption) implements Comparable<ParseResult> {
public int compareSelf() {
return this.tagname.compareToIgnoreCase(this.assumption);
}
@Override
public int compareTo(@NotNull ParseResult o) {
return this.compareSelf() - o.compareSelf();
}
}

View File

@@ -0,0 +1,105 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser;
import org.openautonomousconnection.htmlparser.html.CustomHTMLElement;
import org.openautonomousconnection.htmlparser.html.HTML;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter;
import lombok.Getter;
import org.openautonomousconnection.htmlparser.interpreter.script.pyscript.PyScriptInterpreter;
import java.util.Arrays;
import java.util.Objects;
import static org.openautonomousconnection.StringUtils_Remove_Please.splitSeq;
public class Parser {
public static String DEFAULT_TITLE = "untitled";
@Getter
private final TagManager tagManager;
@Getter
private HTML html;
private final String[] tokens;
public Parser(String content, TagManager tagManager) {
this.html = new HTML();
this.tagManager = tagManager;
String[] split = splitSeq(new String[]{content}, ">");
// TODO: you can do this using regex \\s in one line instead of 3
String[] split_spaces = splitSeq(split, " ");
String[] split_tabs = splitSeq(split_spaces, "\t");
this.tokens = splitSeq(split_tabs, "\n");
// for(String s : tokens)
// System.out.print(s);
// System.out.println();
// List<List<String>> l = StringUtils_Remove_Please.getEncapsulatedTexts("""
//
// part UNO"part dos'stillpartdos'" 'PART TRES YAYAYYA' and gone bye.
//
// """, "\"", "'");
//
// for(List<String> list : l)
// for(String s : list)
// System.out.println("s: " + s);
System.out.println();
System.out.println(this.parse());
}
public HTML parse() {
HTMLInterpreter interpreter = new HTMLInterpreter(this, new PyScriptInterpreter(this));
for(String s : this.tokens)
interpreter.nextState(s);
return interpreter.getResult();
}
public Class<? extends HTMLElement> getByTagname(String tagName) {
tagName = tagName.toLowerCase();
Class<? extends HTMLElement> res = this.tagManager.tags.get(tagName);
return Objects.requireNonNullElse(res, CustomHTMLElement.class);
}
public static void main(String[] args) {
Parser parser = new Parser("""
<!DOCTYPE Html>
<html>
<body>
<p>a paragraph <span color='green'> in color! </span> test </p>
<br>
<!-- this is a comment -->
<!--<script> print("<hi>"); ignore pls
</script>-->
<script> print("<hoi>");
</script>
</body>
</html>
""", new TagManager());
}
}

View File

@@ -0,0 +1,117 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import org.openautonomousconnection.htmlparser.html.NoContent;
import org.openautonomousconnection.htmlparser.html.body.HTMLBody;
import org.openautonomousconnection.htmlparser.html.body.buttons.HTMLButton;
import org.openautonomousconnection.htmlparser.html.body.form.HTMLForm;
import org.openautonomousconnection.htmlparser.html.body.form.HTMLInput;
import org.openautonomousconnection.htmlparser.html.body.form.HTMLLabel;
import org.openautonomousconnection.htmlparser.html.body.link.HTMLArea;
import org.openautonomousconnection.htmlparser.html.body.link.HTMLHyperlink;
import org.openautonomousconnection.htmlparser.html.body.link.HTMLImage;
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLBreak;
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment;
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLDiv;
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLScript;
import org.openautonomousconnection.htmlparser.html.body.texts.HTMLAbbreviation;
import org.openautonomousconnection.htmlparser.html.body.texts.heading.HTMLHeading;
import org.openautonomousconnection.htmlparser.html.body.texts.heading.HeadingType;
import org.openautonomousconnection.htmlparser.html.body.texts.text.HTMLText;
import org.openautonomousconnection.htmlparser.html.body.texts.text.TextType;
import org.openautonomousconnection.htmlparser.html.header.HTMLHeader;
import org.openautonomousconnection.htmlparser.html.header.HTMLTitle;
import java.util.*;
public class TagManager {
public Map<String, Class<? extends HTMLElement>> tags;
public void putTag(Class<? extends HTMLElement> tag) {
try {
this.tags.put((String) tag.getDeclaredField("TAG").get(tag), tag);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
public boolean isTag(String tagName) {
return this.tags.containsKey(tagName);
}
public boolean isTagSpaced(String tagName) {
return this.isTag(tagName.replaceAll("\\s", ""));
}
// public boolean hasClosingTag(String tagName) {
// try {
// return (boolean) this.tags.get(tagName).getField("CLOSEABLE").get(null);
// } catch (NoSuchFieldException | IllegalAccessException e) {
// throw new RuntimeException(e);
// }
// }
public boolean hasText(String tagName) {
if(!this.isTag(tagName))
return false;
return !this.tags.get(tagName).isAnnotationPresent(NoContent.class);
}
public TreeSet<ParseResult> couldBe(String string) {
TreeSet<ParseResult> result = new TreeSet<>();
for(String tagName : tags.keySet())
if(tagName.contains(string))
result.add(new ParseResult(string, tagName));
return result;
}
public TagManager() {
// Map default tags
this.tags = new HashMap<>();
// buttons
this.putTag(HTMLButton.class);
// forms
this.putTag(HTMLForm.class);
this.putTag(HTMLInput.class);
this.putTag(HTMLLabel.class);
// links
this.putTag(HTMLArea.class);
this.putTag(HTMLHyperlink.class);
this.putTag(HTMLImage.class);
// misc
this.putTag(HTMLBreak.class);
this.putTag(HTMLDiv.class);
this.putTag(HTMLScript.class);
this.putTag(HTMLComment.class);
// headings
for(HeadingType type : HeadingType.values())
this.tags.put(type.getTag(), HTMLHeading.class);
// texts
for(TextType type : TextType.values())
this.tags.put(type.getTag(), HTMLText.class);
this.putTag(HTMLAbbreviation.class);
// headers
this.putTag(HTMLHeader.class);
this.putTag(HTMLTitle.class);
// main elements
this.putTag(HTMLBody.class);
this.putTag(HTMLHeader.class);
this.putTag(HTMLImage.class);
}
}

View File

@@ -0,0 +1,10 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser.exception;
public class NullTagException extends NullPointerException {
public NullTagException() {
super("Tag can't be null!");
}
}

View File

@@ -0,0 +1,35 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html;
import lombok.Getter;
import lombok.Setter;
import java.util.Map;
import java.util.Optional;
public class CustomHTMLElement extends HTMLElement{
public static final boolean CLOSEABLE = true;
@Getter @Setter
private String text;
public CustomHTMLElement(String tag, String text, Map<String, String> attributes) {
this.tagName = tag;
this.text = text;
this.attributes = attributes;
this.id = Optional.of(attributes.get("id"));
}
// @Override
// public String toString() {
// return otag() + this.text + ctag();
// }
public static CustomHTMLElement instantiate(String text, Map<String, String> attributes) {
return new CustomHTMLElement("UNKNOWN_ELEMENT", text, attributes);
}
}

View File

@@ -0,0 +1,90 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html;
import org.openautonomousconnection.htmlparser.html.body.HTMLBody;
import org.openautonomousconnection.htmlparser.html.header.HTMLHeader;
import org.openautonomousconnection.htmlparser.html.misc.HTMLClass;
import lombok.Getter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* Parent element for all HTML content
*/
public class HTML extends HTMLElement{
public static final String TAG = "html";
public static final boolean CLOSEABLE = true;
public final List<HTMLClass> classes;
public HTML(HTMLHeader header, HTMLBody body) {
super(null);
this.header = header;
this.body = body;
this.tagName = TAG;
this.classes = new ArrayList<>();
}
public HTML() {
this(null, null);
}
@Getter
private HTMLHeader header;
@Getter
private HTMLBody body;
@Override
public HTMLElement append(HTMLElement element) {
if(element instanceof HTMLHeader)
this.header = (HTMLHeader) element;
else if(element instanceof HTMLBody)
this.body = (HTMLBody) element;
else
super.append(element);
return element;
}
public HTMLBody setBody(HTMLBody body) {
this.body = body;
this.body.parent = this;
return this.body;
}
public HTMLHeader setHeader(HTMLHeader header) {
this.header = header;
this.header.parent = this;
return this.header;
}
// @Override
// public String toString() {
// return otag()
// + "\n\t"
// + header.toString() + "\n\t"
// + body.toString() + "\n"
// + ctag();
// }
public static HTML instantiate(String text, Map<String, String> attributes) {
HTML html = new HTML();
html.setAttributes(attributes);
return html;
}
}

View File

@@ -0,0 +1,142 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html;
import org.openautonomousconnection.htmlparser.html.misc.HTMLClass;
import lombok.Getter;
import lombok.Setter;
import org.jetbrains.annotations.Nullable;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
public abstract class HTMLElement {
@Getter
protected HTMLElement parent;
@Getter
protected List<HTMLElement> children;
@Getter
protected String tagName;
@Getter @Setter
protected Optional<String> id;
@Getter @Setter
protected Optional<HTMLClass> htmlClass;
@Getter @Setter
protected Map<String, String> attributes;
protected HTMLElement(@Nullable HTMLElement parent) {
this.parent = parent;
this.attributes = new HashMap<>();
this.id = Optional.empty();
this.htmlClass = Optional.empty();
this.children = new ArrayList<>();
}
protected HTMLElement() {
this(null);
}
// @Override
// public abstract String toString();
public HTMLElement append(HTMLElement element) {
element.parent = this;
this.children.add(element);
return element;
}
public final String toString() {
if(this.getClass().isAnnotationPresent(NoContent.class))
return otag();
StringBuilder sb = new StringBuilder(otag()).append("\n\t");
for(HTMLElement child : this.children)
sb.append(child).append("\n\t");
return sb.append(ctag()).toString();
}
/**
* Reload ID from attributes map
*/
public void reloadId() {
if(this.attributes.containsKey("id")) {
this.id = Optional.of(this.attributes.get("id"));
this.attributes.remove("id");
}
}
protected String getIdString() {
return this.id.map(string -> "id='" + string +"' ").orElse("");
}
protected String getClassString() {
return this.htmlClass.map(htmlClass -> "class='" + htmlClass.getClassName() + "' ").orElse("");
}
protected String getAttributesString() {
StringBuilder sb = new StringBuilder(" ");
for(String string : this.attributes.keySet())
sb.append(string)
.append("='")
.append(this.attributes.get(string))
.append("' ");
if(!sb.toString().equals(" "))
return sb.substring(0, sb.length()-1);
else
return "";
}
protected String otag() {
return "<" + this.tagName + getIdString() + getClassString() + getAttributesString() + ">";
}
protected String otag(String _attributes) {
return "<" + this.tagName + getIdString() + getClassString() + " " + _attributes + getAttributesString() + ">";
}
protected String ctag() {
return "</" + this.tagName + ">";
}
protected String cutTag(String string, String _attributes) {
return string.replaceFirst("<" + tagName + " " + _attributes + ">", "").replaceFirst("</" + tagName + ">", "").trim();
}
protected String cutTag(String string) {
return cutTag(string, "");
}
// protected static Class<? extends HTMLElement> getNext(Parser parser, String string) {
// String sub = string.substring(string.indexOf("<" + 1)).split(" ")[0];
//
// return parser.getByTagname(sub);
// }
//
/**
* only use if child objects can exist
* @return parsed child objects
*/
// TODO: 1. handle comments 2. somehow handle non-tag &gts & &lts
public static HTMLElement instantiate(Class<? extends HTMLElement> elementClass, String text, Map<String, String> attributes) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException {
return (HTMLElement) elementClass.getMethod("instantiate", String.class, Map.class).invoke(null, text, attributes);
}
}

View File

@@ -0,0 +1,11 @@
package org.openautonomousconnection.htmlparser.html;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface NoContent {
}

View File

@@ -0,0 +1,22 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import lombok.Getter;
import lombok.Setter;
import org.jetbrains.annotations.Nullable;
public abstract class BodyElement extends HTMLElement {
@Getter @Setter
protected String text;
protected BodyElement(@Nullable HTMLElement parent) {
super(parent);
}
protected BodyElement() {
}
}

View File

@@ -0,0 +1,42 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
public class HTMLBody extends HTMLElement {
public static final String TAG = "body";
public static final boolean CLOSEABLE = true;
public HTMLBody(List<HTMLElement> elements) {
this.children = elements;
this.tagName = TAG;
}
public HTMLBody(HTMLElement... elements) {
this.children = new ArrayList<>(Arrays.stream(elements).toList());
this.tagName = TAG;
}
public static HTMLBody instantiate(String text, Map<String, String> attributes) {
HTMLBody body = new HTMLBody();
body.setAttributes(attributes);
body.reloadId();
return body;
}
}

View File

@@ -0,0 +1,16 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.buttons;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
public abstract class ButtonElement extends BodyElement {
public String getScript() {
return this.attributes.get("onclick");
}
public void setScript(String script) {
this.attributes.replace("onclick", script);
}
}

View File

@@ -0,0 +1,34 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.buttons;
import java.util.Map;
public class HTMLButton extends ButtonElement {
public static final String TAG = "button";
public static final boolean CLOSEABLE = false;
public HTMLButton(String text) {
this.text = text;
this.tagName = TAG;
}
public HTMLButton(String text, String script) {
this.text = text;
this.setScript(script);
this.tagName = TAG;
}
public static HTMLButton instantiate(String text, Map<String, String> attributes) {
HTMLButton form = new HTMLButton(text);
form.setAttributes(attributes);
form.reloadId();
return form;
}
}

View File

@@ -0,0 +1,10 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.form;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
public abstract class FormElement extends BodyElement {
}

View File

@@ -0,0 +1,34 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.form;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import java.util.Map;
public class HTMLForm extends HTMLElement {
public static final String TAG = "form";
public static final boolean CLOSEABLE = true;
public HTMLForm() {
this.tagName = TAG;
}
public HTMLForm(String action) {
this.attributes.put("action", action);
this.tagName = TAG;
}
public static HTMLForm instantiate(String text, Map<String, String> attributes) {
HTMLForm form = new HTMLForm();
form.setAttributes(attributes);
form.reloadId();
return form;
}
}

View File

@@ -0,0 +1,59 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser.html.body.form;
import org.openautonomousconnection.htmlparser.html.NoContent;
import java.util.Map;
@NoContent
public class HTMLInput extends FormElement {
public static final String TAG = "input";
public static final boolean CLOSEABLE = false;
public HTMLInput() {
}
public HTMLInput(String type, String name) {
this.setType(type);
this.setName(name);
this.tagName = TAG;
}
public String getType() {
return this.attributes.get("title");
}
public String getName() {
return this.attributes.get("title");
}
public void setType(String type) {
this.attributes.replace("type", type);
}
public void setName(String name) {
this.attributes.replace("name", name);
}
public static HTMLInput instantiate(String text, Map<String, String> attributes) {
HTMLInput input = new HTMLInput(attributes.get("type"), attributes.get("name"));
input.setText(text);
attributes.remove("type");
attributes.remove("name");
input.setAttributes(attributes);
input.reloadId();
return input;
}
}

View File

@@ -0,0 +1,37 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.form;
import java.util.Map;
public class HTMLLabel extends FormElement {
public static final String TAG = "label";
public static final boolean CLOSEABLE = true;
public HTMLLabel(String text) {
this.text = text;
this.tagName = TAG;
}
public String get_for() {
return this.attributes.get("for");
}
public void set_for(String _for) {
this.attributes.replace("for", _for);
}
public static HTMLLabel instantiate(String text, Map<String, String> attributes) {
HTMLLabel label = new HTMLLabel(text);
label.setAttributes(attributes);
label.reloadId();
return label;
}
}

View File

@@ -0,0 +1,53 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.link;
import java.util.Map;
public class HTMLArea extends LinkElement {
public static final String TAG = "area";
public static final boolean CLOSEABLE = true;
public String getShape() {
return this.attributes.get("shape");
}
public String getCoords() {
return this.attributes.get("coords");
}
public void setShape(String shape) {
this.attributes.replace("shape", shape);
}
public void setCoords(String coords) {
this.attributes.replace("coords", coords);
}
public HTMLArea() {
this.tagName = TAG;
}
public HTMLArea(String src, String shape, String coords) {
this.setSource(src);
this.setShape(shape);
this.setCoords(coords);
this.tagName = TAG;
}
public static HTMLArea instantiate(String text, Map<String, String> attributes) {
HTMLArea area = new HTMLArea();
area.setText(text);
area.setAttributes(attributes);
area.reloadId();
return area;
}
}

View File

@@ -0,0 +1,36 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.link;
import java.util.Map;
public class HTMLHyperlink extends LinkElement {
public static final String TAG = "a";
public static final boolean CLOSEABLE = true;
public HTMLHyperlink(String text) {
this.text = text;
this.tagName = TAG;
}
public HTMLHyperlink(String text, String src) {
this.text = text;
this.setSource(src);
this.tagName = TAG;
}
public static HTMLHyperlink instantiate(String text, Map<String, String> attributes) {
HTMLHyperlink hyperlink = new HTMLHyperlink(text);
hyperlink.setAttributes(attributes);
hyperlink.reloadId();
return hyperlink;
}
}

View File

@@ -0,0 +1,34 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.link;
import java.util.Map;
public class HTMLImage extends LinkElement {
public static final String TAG = "img";
public static final boolean CLOSEABLE = false;
public HTMLImage() {
this.tagName = TAG;
}
public HTMLImage(String src) {
this.setSource(src);
this.tagName = TAG;
}
public static HTMLImage instantiate(String text, Map<String, String> attributes) {
HTMLImage image = new HTMLImage();
image.setText(text);
image.setAttributes(attributes);
image.reloadId();
return image;
}
}

View File

@@ -0,0 +1,20 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.link;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
public abstract class LinkElement extends BodyElement {
protected LinkElement() {
}
public String getSource() {
return this.attributes.get("src");
}
public void setSource(String source) {
this.attributes.replace("src", source);
}
}

View File

@@ -0,0 +1,34 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.misc;
import org.openautonomousconnection.htmlparser.html.NoContent;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import java.util.Map;
@NoContent
public class HTMLBreak extends BodyElement {
public static final String TAG = "br";
public static final boolean CLOSEABLE = false;
public HTMLBreak() {
this.tagName = TAG;
}
public static HTMLBreak instantiate(String text, Map<String, String> attributes) {
HTMLBreak hbreak = new HTMLBreak();
hbreak.setText(text);
hbreak.setAttributes(attributes);
hbreak.reloadId();
return hbreak;
}
}

View File

@@ -0,0 +1,51 @@
package org.openautonomousconnection.htmlparser.html.body.misc;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import lombok.Getter;
import lombok.Setter;
import java.util.Map;
@Getter @Setter
public class HTMLComment extends HTMLElement {
public static String TAG = "--";
private String text;
public static final boolean CLOSEABLE = true;
public HTMLComment(String text) {
this.text = text;
this.tagName = "";
}
public static HTMLComment instantiate(String text, Map<String, String> attributes) {
return new HTMLComment(text);
}
@Override
protected String getIdString() {
return "";
}
@Override
protected String getClassString() {
return "";
}
@Override
protected String getAttributesString() {
return "";
}
@Override
protected String otag() {
return "<!--";
}
@Override
protected String ctag() {
return "-->";
}
}

View File

@@ -0,0 +1,29 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.misc;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import java.util.Map;
public class HTMLDiv extends HTMLElement {
public static final String TAG = "div";
public static final boolean CLOSEABLE = true;
public HTMLDiv() {
this.tagName = TAG;
}
public static HTMLDiv instantiate(String text, Map<String, String> attributes) {
HTMLDiv div = new HTMLDiv();
div.setAttributes(attributes);
div.reloadId();
return div;
}
}

View File

@@ -0,0 +1,30 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.misc;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import java.util.Map;
public class HTMLScript extends BodyElement {
public static final String TAG = "script";
public static final boolean CLOSEABLE = true;
public HTMLScript(String text) {
this.text = text;
this.tagName = TAG;
}
public static HTMLScript instantiate(String text, Map<String, String> attributes) {
HTMLScript script = new HTMLScript(text);
script.setAttributes(attributes);
script.reloadId();
return script;
}
}

View File

@@ -0,0 +1,42 @@
package org.openautonomousconnection.htmlparser.html.body.texts;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import java.util.Map;
public class HTMLAbbreviation extends BodyElement {
public static final String TAG = "abbr";
public static final boolean CLOSEABLE = true;
public HTMLAbbreviation(String text) {
this.text = text;
this.tagName = TAG;
}
public HTMLAbbreviation(String text, String title) {
this.text = text;
this.setTitle(title);
this.tagName = TAG;
}
public String getTitle() {
return this.attributes.get("title");
}
public void setTitle(String title) {
this.attributes.replace("title", title);
}
public static HTMLAbbreviation instantiate(String text, Map<String, String> attributes) {
HTMLAbbreviation abbreviation = new HTMLAbbreviation(text);
abbreviation.setAttributes(attributes);
abbreviation.reloadId();
return abbreviation;
}
}

View File

@@ -0,0 +1,39 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.texts.heading;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import lombok.Getter;
import java.util.Map;
public class HTMLHeading extends BodyElement {
@Getter
protected HeadingType type;
public static final boolean CLOSEABLE = true;
public HTMLHeading(String text, HeadingType type) {
this.text = text;
this.type = type;
this.tagName = this.type.getTag();
}
public void setType(HeadingType type) {
this.type = type;
this.tagName = this.type.getTag();
}
public static HTMLHeading instantiate(String text, Map<String, String> attributes) {
HTMLHeading heading = new HTMLHeading(text, HeadingType.H1);
heading.setAttributes(attributes);
heading.reloadId();
return heading;
}
}

View File

@@ -0,0 +1,23 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.body.texts.heading;
import lombok.Getter;
public enum HeadingType {
H1("h1"),
H2("h2"),
H3("h3"),
H4("h4"),
H5("h5"),
H6("h6");
@Getter
private String tag;
HeadingType(String tag) {
this.tag = tag;
}
}

View File

@@ -0,0 +1,36 @@
package org.openautonomousconnection.htmlparser.html.body.texts.text;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import lombok.Getter;
import java.util.Map;
public class HTMLText extends BodyElement {
@Getter
private TextType type;
public static final boolean CLOSEABLE = true;
protected HTMLText(String text, TextType type) {
this.text = text;
this.type = type;
this.tagName = this.type.getTag();
}
public void setType(TextType type) {
this.type = type;
this.tagName = this.type.getTag();
}
public static HTMLText instantiate(String text, Map<String, String> attributes) {
HTMLText htext = new HTMLText(text, TextType.PARAGRAPH);
htext.setAttributes(attributes);
htext.reloadId();
return htext;
}
}

View File

@@ -0,0 +1,26 @@
package org.openautonomousconnection.htmlparser.html.body.texts.text;
import lombok.Getter;
public enum TextType {
PARAGRAPH("p"),
BOLD("b"),
STRONG("strong"),
ITALIC("i"),
EMPHASIZED("em"),
MARKED("mark"),
SMALL("small"),
DELETED("del"),
INSERTED("ins"),
SUBSCRIPT("sub"),
SUPERSCRIPT("sup"),
UNDERLINED("u"),
SPAN("span");
@Getter
private String tag;
TextType(String tag) {
this.tag = tag;
}
}

View File

@@ -0,0 +1,49 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.header;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import lombok.Getter;
import java.util.*;
public class HTMLHeader extends HTMLElement {
public static final String TAG = "head";
public static final boolean CLOSEABLE = true;
@Getter
private HTMLTitle title = null;
public HTMLHeader(List<HeaderElement> elements) {
for(HeaderElement element : elements)
if(element instanceof HTMLTitle title)
this.title = title;
else
this.append(element);
//this.elements = elements;
if(title == null)
title = new HTMLTitle();
this.tagName = TAG;
}
public HTMLHeader(HeaderElement... element) {
this(new ArrayList<>(Arrays.stream(element).toList()));
}
public static HTMLHeader instantiate(String text, Map<String, String> attributes) {
HTMLHeader header = new HTMLHeader();
header.setAttributes(attributes);
header.reloadId();
return header;
}
}

View File

@@ -0,0 +1,35 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.header;
import org.openautonomousconnection.htmlparser.Parser;
import java.util.Map;
public class HTMLTitle extends HeaderElement {
public static final String TAG = "title";
public static final boolean CLOSEABLE = true;
public HTMLTitle(String text) {
this.text = text;
this.tagName = TAG;
}
public HTMLTitle() {
this(Parser.DEFAULT_TITLE);
}
public static HTMLTitle instantiate(String text, Map<String, String> attributes) {
HTMLTitle title = new HTMLTitle(text);
title.setAttributes(attributes);
title.reloadId();
return title;
}
}

View File

@@ -0,0 +1,24 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.header;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import lombok.Getter;
import lombok.Setter;
import org.jetbrains.annotations.Nullable;
public abstract class HeaderElement extends BodyElement {
@Getter @Setter
protected String text;
protected HeaderElement(@Nullable HTMLElement parent) {
super(parent);
}
protected HeaderElement() {
}
}

View File

@@ -0,0 +1,30 @@
// Author: maple
// date: 9/20/25
package org.openautonomousconnection.htmlparser.html.misc;
import org.openautonomousconnection.htmlparser.html.HTML;
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
import lombok.Getter;
import lombok.Setter;
import java.util.ArrayList;
import java.util.List;
public class HTMLClass {
@Getter @Setter
protected String className;
public List<BodyElement> elements;
public HTMLClass(String className, HTML document) {
this.className = className;
this.elements = new ArrayList<>();
document.classes.add(this);
}
}

View File

@@ -0,0 +1,64 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser.interpreter;
import org.openautonomousconnection.htmlparser.Parser;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import lombok.Getter;
import lombok.Setter;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Map;
public class ElementBuilder {
private Class<? extends HTMLElement> clazz;
@Getter @Setter
private Map<String, String> attributes;
@Getter @Setter
private String text, tagName;
/**
* build a html Element
* @param parser needed to retrieve element class (tagnames are relative)
* @param tagName name of the tag
*/
public ElementBuilder(Parser parser, String tagName) {
this.clazz = parser.getByTagname(tagName);
this.attributes = new HashMap<>();
this.tagName = tagName;
}
public HTMLElement build() {
try {
System.out.println(clazz.getSimpleName());
return HTMLElement.instantiate(clazz, text, attributes);
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
throw new RuntimeException(e);
}
}
public void appendText(String text) {
if(this.text != null)
this.text = this.text + text;
else
this.text = text;
}
@Override
public String toString() {
return "ElementBuilder{" +
"clazz=" + clazz.getSimpleName() +
", attributes=" + attributes +
", text='" + text + '\'' +
", tagName='" + tagName + '\'' +
'}';
}
}

View File

@@ -0,0 +1,447 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser.interpreter;
import org.openautonomousconnection.StringUtils_Remove_Please;
import org.openautonomousconnection.htmlparser.Parser;
import org.openautonomousconnection.htmlparser.TagManager;
import org.openautonomousconnection.htmlparser.html.HTML;
import org.openautonomousconnection.htmlparser.html.HTMLElement;
import org.openautonomousconnection.htmlparser.interpreter.html.exception.ExpectStringException;
import org.openautonomousconnection.htmlparser.interpreter.html.exception.UnexpectedTokenException;
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
import lombok.Getter;
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
import java.util.Arrays;
import java.util.Map;
import java.util.Stack;
public class HTMLInterpreter implements Interpreter {
@Getter
private HTMLState currentState = HTMLState.TAG;
// Used to go up a layer after comment is opened
private HTMLState inbetweenState = HTMLState.COMMENT;
@Getter
private Parser parser;
private TagManager tagManager;
private Stack<ElementBuilder> elementBuilders;
private StringBuilder currentAttribute, currentValue, currentText, currentClosingTag;
public int currentLine = 1;
private HTMLElement currentElement;
private ScriptInterpreter scriptInterpreter;
public HTMLInterpreter(Parser parser, ScriptInterpreter scriptInterpreter) {
this.parser = parser;
this.tagManager = parser.getTagManager();
this.scriptInterpreter = scriptInterpreter;
this.currentText = new StringBuilder();
this.currentClosingTag = new StringBuilder();
this.currentAttribute = new StringBuilder();
this.currentValue = new StringBuilder();
this.elementBuilders = new Stack<>();
}
@Override
public void nextState(String token) {
boolean newLine = token.endsWith("\n");
if(token.isBlank()) {
if (newLine)
this.currentLine++;
return;
}
this.currentState = switch (this.currentState) {
case TAG -> tag(token.strip());
case CLOSE_TAG -> close_tag(token.strip());
case TEXT -> text(token);
case DOCTYPE -> doctype(token.strip());
case ATTRIBUTE -> attribute(token.strip());
case ATTRIBUTE_EQUALS -> attribute_equals(token.strip());
case COMMENT -> comment(token);
case VALUE -> value(token);
case SCRIPT -> script(token);
default -> this.currentState;
};
if(newLine)
this.currentLine++;
}
@Override
public boolean finished() {
return false;
}
public HTML getResult() {
return (HTML) this.currentElement;
}
// Only public at the moment because of JavaScriptInterpreter
public static String stripTag(String token) {
return token.replace("<","").replace(">","");
}
/**
* Open a script
* @param token script
* @return next state
*/
private HTMLState script(String token) {
this.scriptInterpreter.currentLine = this.currentLine;
this.scriptInterpreter.nextState(token);
// TODO: Change for release. This is debug code
if(this.scriptInterpreter.finished()) {
// the ScriptInterpreter already has its own ElementBuilder
this.elementBuilders.pop();
this.elementBuilders.push(this.scriptInterpreter.getElementBuilder());
return this.close_tag(token);
}
else
return HTMLState.SCRIPT;
}
/**
* Open a tag
* @param token tag
* @return next state
*/
private HTMLState tag(String token) {
String tagName = stripTag(token);
boolean hasText = this.tagManager.hasText(tagName);
if(tagName.equalsIgnoreCase("!DOCTYPE"))
return HTMLState.DOCTYPE;
else if(tagName.stripLeading().startsWith("!--"))
return returnCommentState();
this.elementBuilders.push(new ElementBuilder(this.parser, tagName));
String[] split = new String[] {token};
if(token.contains(">"))
split = StringUtils_Remove_Please.splitSeq(new String[]{
token.substring(0, token.indexOf('>'))
}, ">");
// TODO: Change for release. This is debug code
if(this.elementBuilders.peek().getTagName().equals("script"))
return split.length == 1 ? HTMLState.SCRIPT : script(token.substring(token.indexOf(">")+1));
if(!token.contains(">"))
return HTMLState.ATTRIBUTE;
if(split.length == 1)
return hasText ? HTMLState.TEXT : HTMLState.TAG;
else
return attribute(token.substring(token.indexOf('>')+1));
}
/**
* Close a tag
* @param token closing tag
* @return next state
*/
private HTMLState close_tag(String token) {
System.out.println(Arrays.toString(this.elementBuilders.toArray()));
this.currentClosingTag.append(token.toLowerCase().strip());
String ct = this.currentClosingTag.toString();
String tagName = this.elementBuilders.peek().getTagName();
// one instruction tags don't have a clo
if(!this.tagManager.hasText(tagName)) {
this.elementBuilders.pop();
return HTMLState.TEXT;
}
// Comments are special
String should = tagName.equals("--") ? tagName + '>' : "</" + tagName + ">";
System.out.println("should: " + should + " token: " + token);
if(should.equals(ct)) {
if(this.currentElement != null)
this.currentElement = this.currentElement.append(this.elementBuilders.pop().build());
else
this.currentElement = this.elementBuilders.pop().build();
if(!(this.currentElement instanceof HTML))
this.currentElement = this.currentElement.getParent();
this.currentClosingTag = new StringBuilder();
return HTMLState.TEXT;
}
// </should> not reached yet
else if(should.startsWith(ct))
return HTMLState.TEXT;
// token not the same as </should>
else
throw new UnexpectedTokenException(token, this.currentLine, this.currentState);
}
private HTMLState text(String token) {
String strip = token.stripLeading();
// handle string begin
if(this.currentText.isEmpty()) {
if(strip.startsWith("<"))
return tag(strip);
this.currentText.append(token);
return HTMLState.TEXT;
}
// handle string end or nested elements
else if(token.startsWith("<")) {
this.elementBuilders.peek().setText(this.currentText.toString());
// always reset
this.currentText = new StringBuilder();
if(token.startsWith("</"))
return close_tag(token.stripTrailing());
else
return tag(token.stripTrailing());
}
// continue as text
else {
this.currentText.append(token);
return HTMLState.TEXT;
}
}
/**
* Declare an attribute
* @param token attribute type
* @return next state
*/
private HTMLState attribute(String token) {
if(token.startsWith(">") || token.endsWith(">"))
return text(token);
else if(token.contains("=")) {
// Recursition if declaration and equals are same token
this.currentAttribute = new StringBuilder(token.substring(0, token.indexOf('=')));
return attribute_equals(token.substring(token.indexOf('=')));
}
else {
this.currentAttribute = new StringBuilder(token);
return HTMLState.ATTRIBUTE_EQUALS;
}
}
/**
* Handle equals operator between attribute declaration and definition (can only be '='; will throw otherwise)
* @param token equals operator
* @return next state
*/
private HTMLState attribute_equals(String token) {
boolean dq = token.contains("\""), sq = token.contains("'");
if(dq || sq) {
char quot = dq ? '"' : '\'';
// Recursion if declaration and equals are same token
return value(token.substring(token.indexOf(quot)-1), quot);
}
else if(token.equals("=")){
return HTMLState.VALUE;
}
else {
throw new UnexpectedTokenException(token, this.currentLine, this.currentState);
}
}
/**
* Define an attribute
* @param token attribute value
* @return next state
*/
private HTMLState value(String token) {
return value(token, ' ');
}
/**
* Define a string attribute
* @param token attribute value
* @param quot quotation sign
* @return next state
*/
private HTMLState value(String token, char quot) {
// expected string, got other
if(!token.startsWith("'") && token.startsWith("\""))
throw new ExpectStringException(token, this.currentLine, this.currentState);
this.currentValue = new StringBuilder();
quot = quot != ' ' ? quot : token.charAt(0);
// split by quote character
String[] split = token.split(String.valueOf(quot));
for(int i = 0; i < split.length; i++)
// handle escaped quote character
if(split[i].endsWith("\\")) {
this.currentValue.append(split[i]).append(quot);
split[i] = "";
}
// delete first quotation character
if(!this.currentValue.isEmpty())
this.currentValue.deleteCharAt(0);
StringBuilder rebuilt = new StringBuilder();
// TODO possible error source
for(String s : split)
if(!s.isEmpty())
rebuilt.append(s);
if(!rebuilt.isEmpty() && this.tagManager.hasText(stripTag(token)))
return text(token);
return HTMLState.ATTRIBUTE;
}
/**
* Comment on code
* @param token comment
* @return next state
*/
private HTMLState comment(String token) {
if(this.currentText.isEmpty())
this.currentText = new StringBuilder();
// append comment
if(!token.contains("-->")) {
this.currentText.append(token);
return HTMLState.COMMENT;
}
// end comment
ElementBuilder elementBuilder = new ElementBuilder(this.parser, "--");
elementBuilder.setText(this.currentText.toString());
// always reset
this.currentText = new StringBuilder();
this.elementBuilders.push(elementBuilder);
if(token.split("-->").length == 1)
return commentResetInbetween();
this.currentState = commentResetInbetween();
return close_tag(
token.substring(token.indexOf("-->"))
);
}
/**
* Define the doctype
* @param token document type
* @return next state
*/
private HTMLState doctype(String token) {
String tag = stripTag(token);
if(!tag.equalsIgnoreCase("HTML")) {
/*
Not implemented. Might do so in the future, might not.
*/
}
if(token.endsWith(">"))
return HTMLState.TEXT;
else
return HTMLState.DOCTYPE;
}
// Helper methods
/**
* Reset inbetween state
* @return previous inbetween state
*/
private HTMLState commentResetInbetween() {
HTMLState temp = this.inbetweenState;
this.inbetweenState = HTMLState.COMMENT;
return temp;
}
/**
* Never forget to set the inbetween state!
* @return HTMLState.COMMENT
*/
private HTMLState returnCommentState() {
this.inbetweenState = this.currentState;
return HTMLState.COMMENT;
}
/**
* Continue down without returning own State
* @param token next token
* @return this.currentState
*/
private HTMLState nextTokenDontReturn(String token) {
this.nextState(token);
return this.currentState;
}
/**
* Continue down without returning own State, and close the current tag
* @param token next token
* @return this.currentState
*/
private HTMLState closeTagDontReturn(String token) {
this.close_tag(token);
return this.currentState;
}
}

View File

@@ -0,0 +1,7 @@
package org.openautonomousconnection.htmlparser.interpreter;
public interface Interpreter {
void nextState(String token);
boolean finished();
}

View File

@@ -0,0 +1,9 @@
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
public class ExpectStringException extends HTMLException {
public ExpectStringException(String value, int currentLine, HTMLState currentState) {
super("Expected string, got: " + value, currentLine, currentState);
}
}

View File

@@ -0,0 +1,9 @@
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
public class HTMLException extends RuntimeException {
public HTMLException(String message, int currentLine, HTMLState currentState) {
super(message+ "\nat line: " + currentLine + "\nwith state: " + currentState.toString());
}
}

View File

@@ -0,0 +1,9 @@
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
public class UnexpectedTokenException extends HTMLException {
public UnexpectedTokenException(String token, int currentLine, HTMLState currentState) {
super("Unexpected token: " + token, currentLine, currentState);
}
}

View File

@@ -0,0 +1,6 @@
package org.openautonomousconnection.htmlparser.interpreter.html.state;
public enum HTMLAttributeState {
DECLARATION,
EQUALS
}

View File

@@ -0,0 +1,16 @@
// Author: maple
// date: 9/24/25
package org.openautonomousconnection.htmlparser.interpreter.html.state;
public enum HTMLState {
TAG,
CLOSE_TAG,
ATTRIBUTE,
ATTRIBUTE_EQUALS,
VALUE,
TEXT,
SCRIPT,
COMMENT,
DOCTYPE
}

View File

@@ -0,0 +1,144 @@
// Author: maple
// date: 9/28/25
package org.openautonomousconnection.htmlparser.interpreter.script;
import lombok.Getter;
import org.openautonomousconnection.StringUtils_Remove_Please;
import org.openautonomousconnection.htmlparser.Parser;
import org.openautonomousconnection.htmlparser.TagManager;
import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder;
import org.openautonomousconnection.htmlparser.interpreter.Interpreter;
public abstract class ScriptInterpreter implements Interpreter {
@Getter
protected Parser parser;
@Getter
protected ElementBuilder elementBuilder;
protected TagManager tagManager;
StringBuilder currentText = null;
public int currentLine;
public ScriptInterpreter(Parser parser) {
this.parser = parser;
this.tagManager = parser.getTagManager();
}
// We stole this spaghetti-abomination from chatgpt. Don't change it, it works (or do if you know better)
public String parseScript(String html, int[] indexHolder) {
if(this.currentText == null)
this.currentText = new StringBuilder();
int i = indexHolder[0];
StringBuilder script = new StringBuilder();
boolean inString = false;
boolean inTriple = false;
char stringChar = 0; // ' or "
int tripleCount = 0;
while (i < html.length()) {
char c = html.charAt(i);
if(c == '\n')
this.currentLine++;
if (!inString) {
if (c == '\'' || c == '"') {
int ahead = countSameQuotes(html, i, c);
if (ahead >= 3) {
inString = true;
inTriple = true;
stringChar = c;
i += 3;
script.append(stringChar).append(stringChar).append(stringChar);
continue;
} else {
inString = true;
inTriple = false;
stringChar = c;
}
}
} else {
if (c == '\\') {
script.append(c);
i++;
if (i < html.length())
script.append(html.charAt(i));
i++;
continue;
}
if (inTriple) {
int ahead = countSameQuotes(html, i, stringChar);
if (ahead >= 3) {
script.append(stringChar).append(stringChar).append(stringChar);
i += 3;
inString = false;
inTriple = false;
continue;
}
} else {
if (c == stringChar) {
inString = false;
inTriple = false;
}
}
}
if (!inString) {
int index = html.indexOf('>');
if(index == -1)
continue;
String closingTag = html.substring(i, index);
// if(this.tagManager.isTagSpaced(closingTag)) {
if(StringUtils_Remove_Please.equalsIgnoreWhiteSpaces("</script", closingTag)) {
indexHolder[0] = i + closingTag.length();
this.currentText = null;
return script.toString();
}
// else
// System.out.println("NE: " + closingTag);
// if (html.startsWith("</script>", i)) {
// indexHolder[0] = i + "</script>".length();
// return script.toString();
// }
}
script.append(c);
i++;
}
indexHolder[0] = i;
this.currentText.append(script);
return this.currentText.toString();
}
private int countSameQuotes(String s, int index, char quote) {
int count = 0;
int i = index;
while (i < s.length() && s.charAt(i) == quote) {
count++;
i++;
}
return count;
}
@Override
public boolean finished() {
return this.currentText == null;
}
}

View File

@@ -0,0 +1,115 @@
// Author: maple
// date: 9/28/25
package org.openautonomousconnection.htmlparser.interpreter.script.javascript;
import lombok.Getter;
import org.openautonomousconnection.StringUtils_Remove_Please;
import org.openautonomousconnection.htmlparser.Parser;
import org.openautonomousconnection.htmlparser.TagManager;
import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter;
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
import java.util.ArrayList;
import java.util.List;
public class JavaScriptInterpreter extends ScriptInterpreter {
// TODO: replace with actual interpreter
@Getter
private Parser parser;
private TagManager tagManager;
public JavaScriptInterpreter(Parser parser) {
super(parser);
this.parser = parser;
this.tagManager = parser.getTagManager();
}
private boolean scriptFinished = false;
private StringBuilder text = new StringBuilder();
public String getText() {
return this.text.toString();
}
boolean inSQ, inDQ;
@Override
public void nextState(String token) {
String[] sorted = StringUtils_Remove_Please.containsManySorted(token, "\"", "'");
this.text.append(token);
if(sorted.length > 0)
if(!sorted[0].isEmpty()) {
// for(String s : token.split())
// int indexQuoteChar = token.indexOf(quoteChar);
// xor since this toggles the string case
inSQ = sorted[0].equals("'") ^ inSQ;
inDQ = sorted[0].equals("\"") ^ inDQ;
}
String[] split = token.split(sorted[0]);
// if(!inSQ && !inDQ)
}
@Override
public boolean finished() {
return this.scriptFinished;
}
// private String[][] getTextWithStrings(String token) {
// char previous = 0;
//
// int lastStringIndex = 0;
//
// List<String> strings = new ArrayList<>(), tokens = new ArrayList<>();
// for(char c : token.toCharArray()) {
// if(this.isStringEncapsulator(c, previous)) {
// if(this.inQuotes()) {
// String string = token.substring(lastStringIndex, token.indexOf(c)-1);
//
// strings.add(string);
//
// token = string;
// }
// else
//
//
// }
//
// }
// }
private boolean inQuotes() {
return this.inDQ || this.inSQ;
}
private boolean isStringEncapsulator(char c, char previous) {
boolean escaped = previous == '\\';
if(c == '\'' && !escaped && !this.inDQ) {
this.inSQ = !this.inSQ;
return true;
}
else if (!escaped && !this.inSQ) {
this.inDQ = !this.inDQ;
return true;
}
return false;
}
}

View File

@@ -0,0 +1,37 @@
package org.openautonomousconnection.htmlparser.interpreter.script.pyscript;
import org.openautonomousconnection.htmlparser.Parser;
import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder;
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
public class PyScriptInterpreter extends ScriptInterpreter {
public PyScriptInterpreter(Parser parser) {
super(parser);
}
@Override
public void nextState(String token) {
if(this.elementBuilder == null)
this.elementBuilder = new ElementBuilder(this.parser, "script");
String r = this.parseScript(token, new int[] {0});
// System.out.println(r);
// if(r == null)
// throw new UnexpectedTokenException("token", this.currentLine, HTMLState.SCRIPT);
// if(this.finished())
// this.currentElement = new HTMLScript(r);
if(this.finished()) {
this.elementBuilder.setText(r);
System.out.println(r);
}
}
//
// @Override
// public boolean finished() {
// return this.currentElement != null;
// }
}