first commit
This commit was merged in pull request #1.
This commit is contained in:
@@ -0,0 +1,136 @@
|
||||
package org.openautonomousconnection;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Will be removed once UnlegitLibrary.StringUtils contains these methods
|
||||
*/
|
||||
|
||||
@Deprecated(since = "1.0")
|
||||
public class StringUtils_Remove_Please {
|
||||
|
||||
public static boolean equalsIgnoreWhiteSpaces(String s1, String s2) {
|
||||
|
||||
return s1.replaceAll("\\s", "")
|
||||
.equalsIgnoreCase(s2.replaceAll("\\s", ""));
|
||||
}
|
||||
|
||||
public static String[] splitSeq(String[] tokens, String seq) {
|
||||
List<String> _tokens = new ArrayList<>();
|
||||
|
||||
for(int i = 0; i < tokens.length; i++) {
|
||||
String s = tokens[i];
|
||||
|
||||
if(!s.contains(seq)) {
|
||||
_tokens.add(s);
|
||||
continue;
|
||||
}
|
||||
|
||||
String[] split = s.split(seq);
|
||||
|
||||
for(int j = 0; j < split.length-1; j++)
|
||||
_tokens.add(split[j] + seq);
|
||||
|
||||
if(s.endsWith(seq))
|
||||
_tokens.add(split[split.length-1] + seq);
|
||||
else
|
||||
_tokens.add(split[split.length-1]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
String lastToken = _tokens.getLast();
|
||||
|
||||
if(!lastToken.isEmpty())
|
||||
_tokens.set(_tokens.size()-1, lastToken.substring(0, lastToken.length()-1));
|
||||
|
||||
return _tokens.toArray(new String[0]);
|
||||
}
|
||||
|
||||
public static int countSeq(String string, String seq) {
|
||||
int amount = -1;
|
||||
|
||||
for(String s : string.split(seq))
|
||||
amount++;
|
||||
|
||||
return amount;
|
||||
}
|
||||
|
||||
public static List<List<String>> getEncapsulatedTexts(String text, String... capsules) {
|
||||
List<List<String>> lists = new ArrayList<>();
|
||||
|
||||
lists.add(new ArrayList<>());
|
||||
lists.add(new ArrayList<>());
|
||||
|
||||
while (!text.isEmpty())
|
||||
{
|
||||
String capsule = containsManySorted(text, capsules)[0];
|
||||
|
||||
if(capsule.isEmpty()) {
|
||||
lists.getFirst().add(text);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
String out = text.substring(0, text.indexOf(capsule));
|
||||
|
||||
text = text.substring(text.indexOf(capsule) + capsule.length());
|
||||
|
||||
String in = text.substring(0, text.indexOf(capsule));
|
||||
|
||||
text = text.substring(text.indexOf(capsule) + capsule.length());
|
||||
|
||||
lists.get(0).add(out);
|
||||
lists.get(1).add(in);
|
||||
|
||||
}
|
||||
|
||||
return lists;
|
||||
}
|
||||
|
||||
public static String[] containsMany(String string, String... strings) {
|
||||
String[] result = new String[strings.length];
|
||||
|
||||
for(int i = 0; i < strings.length; i++)
|
||||
if(string.contains(strings[i]))
|
||||
result[i] = strings[i];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static String[] containsManySorted(String string, String... strings) {
|
||||
String[] result = new String[strings.length];
|
||||
|
||||
StringPositionSorted[] records = new StringPositionSorted[strings.length];
|
||||
|
||||
for(int i = 0; i < strings.length; i++)
|
||||
if(string.contains(strings[i]))
|
||||
records[i] = new StringPositionSorted(strings[i], string.indexOf(strings[i]));
|
||||
else
|
||||
records[i] = new StringPositionSorted("", -1);
|
||||
|
||||
Arrays.sort(records);
|
||||
|
||||
int off = 0;
|
||||
|
||||
for(int i = 0; i + off < strings.length; i++) {
|
||||
while (records[i + off].position == -1 &&
|
||||
i + off + 1 < strings.length)
|
||||
off++;
|
||||
result[i] = records[i + off].string();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private record StringPositionSorted(String string, int position) implements Comparable<StringPositionSorted> {
|
||||
@Override
|
||||
public int compareTo(@NotNull StringPositionSorted o) {
|
||||
return this.position - o.position;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser;
|
||||
|
||||
import dev.unlegitdqrk.unlegitlibrary.string.StringUtils;
|
||||
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class DocumentBuilder {
|
||||
@Getter @Setter
|
||||
protected String content;
|
||||
|
||||
@Getter
|
||||
protected List<HTMLComment> comments;
|
||||
|
||||
@Getter
|
||||
protected List<String> attributes, texts, tags;
|
||||
|
||||
public DocumentBuilder(String content) {
|
||||
this.content = content; //content.replace("\n", "");
|
||||
this.comments = new ArrayList<>();
|
||||
this.attributes = new ArrayList<>();
|
||||
this.texts = new ArrayList<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all comments and strings into lists
|
||||
*/
|
||||
public void extract() {
|
||||
this.extractComments();
|
||||
this.extractStringsAndAttributes();
|
||||
this.extractTexts();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* inserts the extracts back into the content string
|
||||
*/
|
||||
public void insert() {
|
||||
this.insertTexts();
|
||||
this.insertStringsAndAttributes();
|
||||
this.insertComments();
|
||||
}
|
||||
|
||||
protected void extractComments() {
|
||||
Pattern pattern = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL);
|
||||
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
this.content = this.content.replace("<!--" + matcher.group(1) + "-->", "<!--C" + index + "-->");
|
||||
|
||||
this.comments.add(new HTMLComment(matcher.group(1)));
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
protected void insertComments() {
|
||||
if(this.comments.isEmpty())
|
||||
return;
|
||||
|
||||
int i = 0;
|
||||
for(; i < this.comments.size(); i++)
|
||||
this.content = this.content.replace("<!--C" + i + "-->", this.comments.get(i).toString());
|
||||
|
||||
for(; i > 0; i--)
|
||||
this.comments.removeFirst();
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected void extractStringsAndAttributes() {
|
||||
Pattern pattern = Pattern.compile("\"(.*?)\"|'(.*?)'", Pattern.DOTALL);
|
||||
|
||||
Matcher matcher = pattern.matcher(this.content);
|
||||
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
|
||||
if(matcher.group(1) != null) {
|
||||
this.content = this.content.replace("\"" + matcher.group(1) + "\"", "\"S" + index + "\"");
|
||||
|
||||
this.attributes.add(matcher.group(1));
|
||||
}
|
||||
|
||||
else {
|
||||
this.content = this.content.replace("'" + matcher.group(2) + "'", "'S" + index + "'");
|
||||
|
||||
this.attributes.add(matcher.group(2));
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
protected void insertStringsAndAttributes() {
|
||||
if(this.attributes.isEmpty())
|
||||
return;
|
||||
int i = 0;
|
||||
for(; i < this.attributes.size(); i++) {
|
||||
this.content = this.content.replace("\"S" + i + "\"", "\"" + attributes.get(i) + "\"");
|
||||
this.content = this.content.replace("'S" + i + "'", "'" + attributes.get(i) + "'");
|
||||
}
|
||||
|
||||
for(; i > 0; i--)
|
||||
this.attributes.removeFirst();
|
||||
}
|
||||
|
||||
protected void extractTexts() {
|
||||
Pattern pattern = Pattern.compile(">([^<]+)(?=<)", Pattern.DOTALL);
|
||||
|
||||
Matcher matcher = pattern.matcher(content);
|
||||
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
if(StringUtils.isEmptyString(matcher.group(1)))
|
||||
continue;
|
||||
|
||||
this.content = this.content.replace(">" + matcher.group(1) + "<", ">T" + index + "<");
|
||||
|
||||
this.texts.add(matcher.group(1));
|
||||
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
protected void insertTexts() {
|
||||
if(this.texts.isEmpty())
|
||||
return;
|
||||
|
||||
int i = 0;
|
||||
for(; i < this.texts.size(); i++)
|
||||
this.content = this.content.replace(">T" + i + "<", ">" + this.texts.get(i) + "<");
|
||||
|
||||
for(; i > 0; i--)
|
||||
this.texts.removeFirst();
|
||||
}
|
||||
|
||||
protected void extractTags() {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public record ParseResult(String tagname, String assumption) implements Comparable<ParseResult> {
|
||||
|
||||
public int compareSelf() {
|
||||
return this.tagname.compareToIgnoreCase(this.assumption);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull ParseResult o) {
|
||||
return this.compareSelf() - o.compareSelf();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.CustomHTMLElement;
|
||||
import org.openautonomousconnection.htmlparser.html.HTML;
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter;
|
||||
import lombok.Getter;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.script.pyscript.PyScriptInterpreter;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.openautonomousconnection.StringUtils_Remove_Please.splitSeq;
|
||||
|
||||
|
||||
public class Parser {
|
||||
|
||||
public static String DEFAULT_TITLE = "untitled";
|
||||
|
||||
@Getter
|
||||
private final TagManager tagManager;
|
||||
|
||||
@Getter
|
||||
private HTML html;
|
||||
|
||||
private final String[] tokens;
|
||||
|
||||
public Parser(String content, TagManager tagManager) {
|
||||
|
||||
this.html = new HTML();
|
||||
this.tagManager = tagManager;
|
||||
|
||||
String[] split = splitSeq(new String[]{content}, ">");
|
||||
|
||||
// TODO: you can do this using regex \\s in one line instead of 3
|
||||
|
||||
String[] split_spaces = splitSeq(split, " ");
|
||||
String[] split_tabs = splitSeq(split_spaces, "\t");
|
||||
|
||||
this.tokens = splitSeq(split_tabs, "\n");
|
||||
|
||||
// for(String s : tokens)
|
||||
// System.out.print(s);
|
||||
// System.out.println();
|
||||
|
||||
// List<List<String>> l = StringUtils_Remove_Please.getEncapsulatedTexts("""
|
||||
//
|
||||
// part UNO"part dos'stillpartdos'" 'PART TRES YAYAYYA' and gone bye.
|
||||
//
|
||||
// """, "\"", "'");
|
||||
//
|
||||
// for(List<String> list : l)
|
||||
// for(String s : list)
|
||||
// System.out.println("s: " + s);
|
||||
|
||||
System.out.println();
|
||||
System.out.println(this.parse());
|
||||
|
||||
}
|
||||
|
||||
public HTML parse() {
|
||||
HTMLInterpreter interpreter = new HTMLInterpreter(this, new PyScriptInterpreter(this));
|
||||
|
||||
for(String s : this.tokens)
|
||||
interpreter.nextState(s);
|
||||
|
||||
|
||||
return interpreter.getResult();
|
||||
}
|
||||
|
||||
public Class<? extends HTMLElement> getByTagname(String tagName) {
|
||||
tagName = tagName.toLowerCase();
|
||||
|
||||
Class<? extends HTMLElement> res = this.tagManager.tags.get(tagName);
|
||||
|
||||
return Objects.requireNonNullElse(res, CustomHTMLElement.class);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Parser parser = new Parser("""
|
||||
<!DOCTYPE Html>
|
||||
<html>
|
||||
<body>
|
||||
<p>a paragraph <span color='green'> in color! </span> test </p>
|
||||
<br>
|
||||
<!-- this is a comment -->
|
||||
|
||||
<!--<script> print("<hi>"); ignore pls
|
||||
</script>-->
|
||||
<script> print("<hoi>");
|
||||
</script>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
""", new TagManager());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import org.openautonomousconnection.htmlparser.html.NoContent;
|
||||
import org.openautonomousconnection.htmlparser.html.body.HTMLBody;
|
||||
import org.openautonomousconnection.htmlparser.html.body.buttons.HTMLButton;
|
||||
import org.openautonomousconnection.htmlparser.html.body.form.HTMLForm;
|
||||
import org.openautonomousconnection.htmlparser.html.body.form.HTMLInput;
|
||||
import org.openautonomousconnection.htmlparser.html.body.form.HTMLLabel;
|
||||
import org.openautonomousconnection.htmlparser.html.body.link.HTMLArea;
|
||||
import org.openautonomousconnection.htmlparser.html.body.link.HTMLHyperlink;
|
||||
import org.openautonomousconnection.htmlparser.html.body.link.HTMLImage;
|
||||
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLBreak;
|
||||
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment;
|
||||
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLDiv;
|
||||
import org.openautonomousconnection.htmlparser.html.body.misc.HTMLScript;
|
||||
import org.openautonomousconnection.htmlparser.html.body.texts.HTMLAbbreviation;
|
||||
import org.openautonomousconnection.htmlparser.html.body.texts.heading.HTMLHeading;
|
||||
import org.openautonomousconnection.htmlparser.html.body.texts.heading.HeadingType;
|
||||
import org.openautonomousconnection.htmlparser.html.body.texts.text.HTMLText;
|
||||
import org.openautonomousconnection.htmlparser.html.body.texts.text.TextType;
|
||||
import org.openautonomousconnection.htmlparser.html.header.HTMLHeader;
|
||||
import org.openautonomousconnection.htmlparser.html.header.HTMLTitle;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class TagManager {
|
||||
public Map<String, Class<? extends HTMLElement>> tags;
|
||||
|
||||
public void putTag(Class<? extends HTMLElement> tag) {
|
||||
try {
|
||||
this.tags.put((String) tag.getDeclaredField("TAG").get(tag), tag);
|
||||
} catch (NoSuchFieldException | IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isTag(String tagName) {
|
||||
return this.tags.containsKey(tagName);
|
||||
}
|
||||
|
||||
public boolean isTagSpaced(String tagName) {
|
||||
return this.isTag(tagName.replaceAll("\\s", ""));
|
||||
}
|
||||
|
||||
// public boolean hasClosingTag(String tagName) {
|
||||
// try {
|
||||
// return (boolean) this.tags.get(tagName).getField("CLOSEABLE").get(null);
|
||||
// } catch (NoSuchFieldException | IllegalAccessException e) {
|
||||
// throw new RuntimeException(e);
|
||||
// }
|
||||
// }
|
||||
|
||||
public boolean hasText(String tagName) {
|
||||
if(!this.isTag(tagName))
|
||||
return false;
|
||||
|
||||
return !this.tags.get(tagName).isAnnotationPresent(NoContent.class);
|
||||
}
|
||||
|
||||
public TreeSet<ParseResult> couldBe(String string) {
|
||||
TreeSet<ParseResult> result = new TreeSet<>();
|
||||
|
||||
for(String tagName : tags.keySet())
|
||||
if(tagName.contains(string))
|
||||
result.add(new ParseResult(string, tagName));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public TagManager() {
|
||||
// Map default tags
|
||||
|
||||
this.tags = new HashMap<>();
|
||||
|
||||
// buttons
|
||||
this.putTag(HTMLButton.class);
|
||||
|
||||
// forms
|
||||
this.putTag(HTMLForm.class);
|
||||
this.putTag(HTMLInput.class);
|
||||
this.putTag(HTMLLabel.class);
|
||||
|
||||
// links
|
||||
this.putTag(HTMLArea.class);
|
||||
this.putTag(HTMLHyperlink.class);
|
||||
this.putTag(HTMLImage.class);
|
||||
|
||||
// misc
|
||||
this.putTag(HTMLBreak.class);
|
||||
this.putTag(HTMLDiv.class);
|
||||
this.putTag(HTMLScript.class);
|
||||
this.putTag(HTMLComment.class);
|
||||
|
||||
// headings
|
||||
for(HeadingType type : HeadingType.values())
|
||||
this.tags.put(type.getTag(), HTMLHeading.class);
|
||||
|
||||
// texts
|
||||
for(TextType type : TextType.values())
|
||||
this.tags.put(type.getTag(), HTMLText.class);
|
||||
|
||||
this.putTag(HTMLAbbreviation.class);
|
||||
|
||||
// headers
|
||||
this.putTag(HTMLHeader.class);
|
||||
this.putTag(HTMLTitle.class);
|
||||
|
||||
// main elements
|
||||
this.putTag(HTMLBody.class);
|
||||
this.putTag(HTMLHeader.class);
|
||||
this.putTag(HTMLImage.class);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.exception;
|
||||
|
||||
public class NullTagException extends NullPointerException {
|
||||
public NullTagException() {
|
||||
super("Tag can't be null!");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class CustomHTMLElement extends HTMLElement{
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
@Getter @Setter
|
||||
private String text;
|
||||
|
||||
public CustomHTMLElement(String tag, String text, Map<String, String> attributes) {
|
||||
this.tagName = tag;
|
||||
this.text = text;
|
||||
this.attributes = attributes;
|
||||
|
||||
this.id = Optional.of(attributes.get("id"));
|
||||
}
|
||||
|
||||
// @Override
|
||||
// public String toString() {
|
||||
// return otag() + this.text + ctag();
|
||||
// }
|
||||
|
||||
public static CustomHTMLElement instantiate(String text, Map<String, String> attributes) {
|
||||
return new CustomHTMLElement("UNKNOWN_ELEMENT", text, attributes);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.HTMLBody;
|
||||
import org.openautonomousconnection.htmlparser.html.header.HTMLHeader;
|
||||
import org.openautonomousconnection.htmlparser.html.misc.HTMLClass;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Parent element for all HTML content
|
||||
*/
|
||||
public class HTML extends HTMLElement{
|
||||
public static final String TAG = "html";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public final List<HTMLClass> classes;
|
||||
|
||||
public HTML(HTMLHeader header, HTMLBody body) {
|
||||
super(null);
|
||||
this.header = header;
|
||||
this.body = body;
|
||||
|
||||
this.tagName = TAG;
|
||||
|
||||
this.classes = new ArrayList<>();
|
||||
}
|
||||
|
||||
public HTML() {
|
||||
this(null, null);
|
||||
}
|
||||
|
||||
@Getter
|
||||
private HTMLHeader header;
|
||||
|
||||
@Getter
|
||||
private HTMLBody body;
|
||||
|
||||
@Override
|
||||
public HTMLElement append(HTMLElement element) {
|
||||
if(element instanceof HTMLHeader)
|
||||
this.header = (HTMLHeader) element;
|
||||
else if(element instanceof HTMLBody)
|
||||
this.body = (HTMLBody) element;
|
||||
else
|
||||
super.append(element);
|
||||
|
||||
return element;
|
||||
|
||||
}
|
||||
|
||||
public HTMLBody setBody(HTMLBody body) {
|
||||
this.body = body;
|
||||
|
||||
this.body.parent = this;
|
||||
|
||||
return this.body;
|
||||
}
|
||||
|
||||
public HTMLHeader setHeader(HTMLHeader header) {
|
||||
this.header = header;
|
||||
|
||||
this.header.parent = this;
|
||||
|
||||
return this.header;
|
||||
}
|
||||
|
||||
// @Override
|
||||
// public String toString() {
|
||||
// return otag()
|
||||
// + "\n\t"
|
||||
// + header.toString() + "\n\t"
|
||||
// + body.toString() + "\n"
|
||||
// + ctag();
|
||||
// }
|
||||
|
||||
public static HTML instantiate(String text, Map<String, String> attributes) {
|
||||
HTML html = new HTML();
|
||||
|
||||
html.setAttributes(attributes);
|
||||
|
||||
return html;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.misc.HTMLClass;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.*;
|
||||
|
||||
public abstract class HTMLElement {
|
||||
|
||||
@Getter
|
||||
protected HTMLElement parent;
|
||||
|
||||
@Getter
|
||||
protected List<HTMLElement> children;
|
||||
|
||||
@Getter
|
||||
protected String tagName;
|
||||
|
||||
@Getter @Setter
|
||||
protected Optional<String> id;
|
||||
|
||||
@Getter @Setter
|
||||
protected Optional<HTMLClass> htmlClass;
|
||||
|
||||
@Getter @Setter
|
||||
protected Map<String, String> attributes;
|
||||
|
||||
protected HTMLElement(@Nullable HTMLElement parent) {
|
||||
this.parent = parent;
|
||||
|
||||
this.attributes = new HashMap<>();
|
||||
|
||||
this.id = Optional.empty();
|
||||
this.htmlClass = Optional.empty();
|
||||
|
||||
this.children = new ArrayList<>();
|
||||
}
|
||||
|
||||
protected HTMLElement() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
|
||||
// @Override
|
||||
// public abstract String toString();
|
||||
|
||||
public HTMLElement append(HTMLElement element) {
|
||||
element.parent = this;
|
||||
this.children.add(element);
|
||||
return element;
|
||||
}
|
||||
|
||||
public final String toString() {
|
||||
if(this.getClass().isAnnotationPresent(NoContent.class))
|
||||
return otag();
|
||||
|
||||
|
||||
StringBuilder sb = new StringBuilder(otag()).append("\n\t");
|
||||
|
||||
for(HTMLElement child : this.children)
|
||||
sb.append(child).append("\n\t");
|
||||
|
||||
return sb.append(ctag()).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reload ID from attributes map
|
||||
*/
|
||||
public void reloadId() {
|
||||
if(this.attributes.containsKey("id")) {
|
||||
this.id = Optional.of(this.attributes.get("id"));
|
||||
this.attributes.remove("id");
|
||||
}
|
||||
}
|
||||
|
||||
protected String getIdString() {
|
||||
return this.id.map(string -> "id='" + string +"' ").orElse("");
|
||||
}
|
||||
|
||||
protected String getClassString() {
|
||||
return this.htmlClass.map(htmlClass -> "class='" + htmlClass.getClassName() + "' ").orElse("");
|
||||
}
|
||||
|
||||
protected String getAttributesString() {
|
||||
StringBuilder sb = new StringBuilder(" ");
|
||||
for(String string : this.attributes.keySet())
|
||||
sb.append(string)
|
||||
.append("='")
|
||||
.append(this.attributes.get(string))
|
||||
.append("' ");
|
||||
|
||||
if(!sb.toString().equals(" "))
|
||||
return sb.substring(0, sb.length()-1);
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
protected String otag() {
|
||||
return "<" + this.tagName + getIdString() + getClassString() + getAttributesString() + ">";
|
||||
}
|
||||
|
||||
protected String otag(String _attributes) {
|
||||
return "<" + this.tagName + getIdString() + getClassString() + " " + _attributes + getAttributesString() + ">";
|
||||
}
|
||||
|
||||
protected String ctag() {
|
||||
return "</" + this.tagName + ">";
|
||||
}
|
||||
|
||||
protected String cutTag(String string, String _attributes) {
|
||||
return string.replaceFirst("<" + tagName + " " + _attributes + ">", "").replaceFirst("</" + tagName + ">", "").trim();
|
||||
}
|
||||
|
||||
protected String cutTag(String string) {
|
||||
return cutTag(string, "");
|
||||
}
|
||||
|
||||
// protected static Class<? extends HTMLElement> getNext(Parser parser, String string) {
|
||||
// String sub = string.substring(string.indexOf("<" + 1)).split(" ")[0];
|
||||
//
|
||||
// return parser.getByTagname(sub);
|
||||
// }
|
||||
//
|
||||
|
||||
/**
|
||||
* only use if child objects can exist
|
||||
* @return parsed child objects
|
||||
*/
|
||||
|
||||
// TODO: 1. handle comments 2. somehow handle non-tag >s & <s
|
||||
|
||||
public static HTMLElement instantiate(Class<? extends HTMLElement> elementClass, String text, Map<String, String> attributes) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException {
|
||||
return (HTMLElement) elementClass.getMethod("instantiate", String.class, Map.class).invoke(null, text, attributes);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package org.openautonomousconnection.htmlparser.html;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface NoContent {
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public abstract class BodyElement extends HTMLElement {
|
||||
@Getter @Setter
|
||||
protected String text;
|
||||
|
||||
protected BodyElement(@Nullable HTMLElement parent) {
|
||||
super(parent);
|
||||
}
|
||||
|
||||
protected BodyElement() {
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLBody extends HTMLElement {
|
||||
|
||||
public static final String TAG = "body";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
|
||||
public HTMLBody(List<HTMLElement> elements) {
|
||||
this.children = elements;
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLBody(HTMLElement... elements) {
|
||||
this.children = new ArrayList<>(Arrays.stream(elements).toList());
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLBody instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLBody body = new HTMLBody();
|
||||
|
||||
body.setAttributes(attributes);
|
||||
|
||||
body.reloadId();
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.buttons;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
public abstract class ButtonElement extends BodyElement {
|
||||
public String getScript() {
|
||||
return this.attributes.get("onclick");
|
||||
}
|
||||
|
||||
public void setScript(String script) {
|
||||
this.attributes.replace("onclick", script);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.buttons;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLButton extends ButtonElement {
|
||||
|
||||
public static final String TAG = "button";
|
||||
|
||||
public static final boolean CLOSEABLE = false;
|
||||
|
||||
public HTMLButton(String text) {
|
||||
this.text = text;
|
||||
this.tagName = TAG;
|
||||
}
|
||||
public HTMLButton(String text, String script) {
|
||||
this.text = text;
|
||||
this.setScript(script);
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLButton instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLButton form = new HTMLButton(text);
|
||||
|
||||
form.setAttributes(attributes);
|
||||
|
||||
form.reloadId();
|
||||
|
||||
return form;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.form;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
public abstract class FormElement extends BodyElement {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.form;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLForm extends HTMLElement {
|
||||
|
||||
public static final String TAG = "form";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLForm() {
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLForm(String action) {
|
||||
this.attributes.put("action", action);
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLForm instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLForm form = new HTMLForm();
|
||||
|
||||
form.setAttributes(attributes);
|
||||
|
||||
form.reloadId();
|
||||
|
||||
return form;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.form;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.NoContent;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@NoContent
|
||||
public class HTMLInput extends FormElement {
|
||||
|
||||
public static final String TAG = "input";
|
||||
|
||||
public static final boolean CLOSEABLE = false;
|
||||
|
||||
public HTMLInput() {
|
||||
|
||||
}
|
||||
|
||||
public HTMLInput(String type, String name) {
|
||||
this.setType(type);
|
||||
this.setName(name);
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return this.attributes.get("title");
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return this.attributes.get("title");
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.attributes.replace("type", type);
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.attributes.replace("name", name);
|
||||
}
|
||||
|
||||
|
||||
public static HTMLInput instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLInput input = new HTMLInput(attributes.get("type"), attributes.get("name"));
|
||||
|
||||
input.setText(text);
|
||||
|
||||
attributes.remove("type");
|
||||
attributes.remove("name");
|
||||
|
||||
input.setAttributes(attributes);
|
||||
|
||||
input.reloadId();
|
||||
|
||||
return input;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.form;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLLabel extends FormElement {
|
||||
|
||||
public static final String TAG = "label";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLLabel(String text) {
|
||||
this.text = text;
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public String get_for() {
|
||||
return this.attributes.get("for");
|
||||
}
|
||||
|
||||
public void set_for(String _for) {
|
||||
this.attributes.replace("for", _for);
|
||||
}
|
||||
|
||||
public static HTMLLabel instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLLabel label = new HTMLLabel(text);
|
||||
|
||||
label.setAttributes(attributes);
|
||||
|
||||
label.reloadId();
|
||||
|
||||
return label;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.link;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLArea extends LinkElement {
|
||||
|
||||
public static final String TAG = "area";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public String getShape() {
|
||||
return this.attributes.get("shape");
|
||||
}
|
||||
|
||||
public String getCoords() {
|
||||
return this.attributes.get("coords");
|
||||
}
|
||||
|
||||
public void setShape(String shape) {
|
||||
this.attributes.replace("shape", shape);
|
||||
}
|
||||
|
||||
public void setCoords(String coords) {
|
||||
this.attributes.replace("coords", coords);
|
||||
}
|
||||
|
||||
public HTMLArea() {
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLArea(String src, String shape, String coords) {
|
||||
this.setSource(src);
|
||||
this.setShape(shape);
|
||||
this.setCoords(coords);
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLArea instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLArea area = new HTMLArea();
|
||||
|
||||
area.setText(text);
|
||||
|
||||
area.setAttributes(attributes);
|
||||
|
||||
area.reloadId();
|
||||
|
||||
return area;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.link;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLHyperlink extends LinkElement {
|
||||
|
||||
public static final String TAG = "a";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLHyperlink(String text) {
|
||||
this.text = text;
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLHyperlink(String text, String src) {
|
||||
this.text = text;
|
||||
this.setSource(src);
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLHyperlink instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLHyperlink hyperlink = new HTMLHyperlink(text);
|
||||
|
||||
hyperlink.setAttributes(attributes);
|
||||
|
||||
hyperlink.reloadId();
|
||||
|
||||
return hyperlink;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.link;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLImage extends LinkElement {
|
||||
|
||||
public static final String TAG = "img";
|
||||
|
||||
public static final boolean CLOSEABLE = false;
|
||||
|
||||
public HTMLImage() {
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLImage(String src) {
|
||||
this.setSource(src);
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLImage instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLImage image = new HTMLImage();
|
||||
|
||||
image.setText(text);
|
||||
|
||||
image.setAttributes(attributes);
|
||||
|
||||
image.reloadId();
|
||||
|
||||
return image;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.link;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
public abstract class LinkElement extends BodyElement {
|
||||
protected LinkElement() {
|
||||
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
return this.attributes.get("src");
|
||||
}
|
||||
|
||||
public void setSource(String source) {
|
||||
this.attributes.replace("src", source);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.misc;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.NoContent;
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@NoContent
|
||||
public class HTMLBreak extends BodyElement {
|
||||
|
||||
public static final String TAG = "br";
|
||||
|
||||
public static final boolean CLOSEABLE = false;
|
||||
|
||||
public HTMLBreak() {
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
|
||||
public static HTMLBreak instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLBreak hbreak = new HTMLBreak();
|
||||
|
||||
hbreak.setText(text);
|
||||
|
||||
hbreak.setAttributes(attributes);
|
||||
|
||||
hbreak.reloadId();
|
||||
|
||||
return hbreak;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package org.openautonomousconnection.htmlparser.html.body.misc;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@Getter @Setter
|
||||
|
||||
public class HTMLComment extends HTMLElement {
|
||||
public static String TAG = "--";
|
||||
|
||||
private String text;
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLComment(String text) {
|
||||
this.text = text;
|
||||
this.tagName = "";
|
||||
}
|
||||
|
||||
public static HTMLComment instantiate(String text, Map<String, String> attributes) {
|
||||
return new HTMLComment(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getIdString() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getClassString() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getAttributesString() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String otag() {
|
||||
return "<!--";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String ctag() {
|
||||
return "-->";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.misc;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLDiv extends HTMLElement {
|
||||
|
||||
public static final String TAG = "div";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLDiv() {
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLDiv instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLDiv div = new HTMLDiv();
|
||||
|
||||
div.setAttributes(attributes);
|
||||
|
||||
div.reloadId();
|
||||
|
||||
return div;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.misc;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLScript extends BodyElement {
|
||||
|
||||
public static final String TAG = "script";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLScript(String text) {
|
||||
this.text = text;
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public static HTMLScript instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLScript script = new HTMLScript(text);
|
||||
|
||||
script.setAttributes(attributes);
|
||||
|
||||
script.reloadId();
|
||||
|
||||
return script;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
package org.openautonomousconnection.htmlparser.html.body.texts;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLAbbreviation extends BodyElement {
|
||||
|
||||
public static final String TAG = "abbr";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLAbbreviation(String text) {
|
||||
this.text = text;
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLAbbreviation(String text, String title) {
|
||||
this.text = text;
|
||||
this.setTitle(title);
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return this.attributes.get("title");
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.attributes.replace("title", title);
|
||||
}
|
||||
|
||||
public static HTMLAbbreviation instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLAbbreviation abbreviation = new HTMLAbbreviation(text);
|
||||
|
||||
abbreviation.setAttributes(attributes);
|
||||
|
||||
abbreviation.reloadId();
|
||||
|
||||
return abbreviation;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.texts.heading;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLHeading extends BodyElement {
|
||||
|
||||
@Getter
|
||||
protected HeadingType type;
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLHeading(String text, HeadingType type) {
|
||||
this.text = text;
|
||||
this.type = type;
|
||||
|
||||
this.tagName = this.type.getTag();
|
||||
}
|
||||
|
||||
public void setType(HeadingType type) {
|
||||
this.type = type;
|
||||
this.tagName = this.type.getTag();
|
||||
}
|
||||
|
||||
public static HTMLHeading instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLHeading heading = new HTMLHeading(text, HeadingType.H1);
|
||||
|
||||
heading.setAttributes(attributes);
|
||||
|
||||
heading.reloadId();
|
||||
|
||||
return heading;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.body.texts.heading;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public enum HeadingType {
|
||||
|
||||
H1("h1"),
|
||||
H2("h2"),
|
||||
H3("h3"),
|
||||
H4("h4"),
|
||||
H5("h5"),
|
||||
H6("h6");
|
||||
|
||||
@Getter
|
||||
private String tag;
|
||||
|
||||
HeadingType(String tag) {
|
||||
this.tag = tag;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package org.openautonomousconnection.htmlparser.html.body.texts.text;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLText extends BodyElement {
|
||||
|
||||
@Getter
|
||||
private TextType type;
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
protected HTMLText(String text, TextType type) {
|
||||
this.text = text;
|
||||
this.type = type;
|
||||
|
||||
this.tagName = this.type.getTag();
|
||||
}
|
||||
|
||||
public void setType(TextType type) {
|
||||
this.type = type;
|
||||
this.tagName = this.type.getTag();
|
||||
}
|
||||
|
||||
public static HTMLText instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLText htext = new HTMLText(text, TextType.PARAGRAPH);
|
||||
|
||||
htext.setAttributes(attributes);
|
||||
|
||||
htext.reloadId();
|
||||
|
||||
return htext;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package org.openautonomousconnection.htmlparser.html.body.texts.text;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public enum TextType {
|
||||
PARAGRAPH("p"),
|
||||
BOLD("b"),
|
||||
STRONG("strong"),
|
||||
ITALIC("i"),
|
||||
EMPHASIZED("em"),
|
||||
MARKED("mark"),
|
||||
SMALL("small"),
|
||||
DELETED("del"),
|
||||
INSERTED("ins"),
|
||||
SUBSCRIPT("sub"),
|
||||
SUPERSCRIPT("sup"),
|
||||
UNDERLINED("u"),
|
||||
SPAN("span");
|
||||
|
||||
@Getter
|
||||
private String tag;
|
||||
|
||||
TextType(String tag) {
|
||||
this.tag = tag;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.header;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class HTMLHeader extends HTMLElement {
|
||||
|
||||
public static final String TAG = "head";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
@Getter
|
||||
private HTMLTitle title = null;
|
||||
|
||||
public HTMLHeader(List<HeaderElement> elements) {
|
||||
|
||||
for(HeaderElement element : elements)
|
||||
if(element instanceof HTMLTitle title)
|
||||
this.title = title;
|
||||
else
|
||||
this.append(element);
|
||||
|
||||
//this.elements = elements;
|
||||
|
||||
if(title == null)
|
||||
title = new HTMLTitle();
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLHeader(HeaderElement... element) {
|
||||
this(new ArrayList<>(Arrays.stream(element).toList()));
|
||||
}
|
||||
|
||||
public static HTMLHeader instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLHeader header = new HTMLHeader();
|
||||
|
||||
header.setAttributes(attributes);
|
||||
|
||||
header.reloadId();
|
||||
|
||||
return header;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.header;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class HTMLTitle extends HeaderElement {
|
||||
|
||||
public static final String TAG = "title";
|
||||
|
||||
public static final boolean CLOSEABLE = true;
|
||||
|
||||
public HTMLTitle(String text) {
|
||||
this.text = text;
|
||||
|
||||
this.tagName = TAG;
|
||||
}
|
||||
|
||||
public HTMLTitle() {
|
||||
this(Parser.DEFAULT_TITLE);
|
||||
}
|
||||
|
||||
public static HTMLTitle instantiate(String text, Map<String, String> attributes) {
|
||||
HTMLTitle title = new HTMLTitle(text);
|
||||
|
||||
title.setAttributes(attributes);
|
||||
|
||||
title.reloadId();
|
||||
|
||||
return title;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.header;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public abstract class HeaderElement extends BodyElement {
|
||||
@Getter @Setter
|
||||
protected String text;
|
||||
|
||||
protected HeaderElement(@Nullable HTMLElement parent) {
|
||||
super(parent);
|
||||
}
|
||||
|
||||
protected HeaderElement() {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
// Author: maple
|
||||
// date: 9/20/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.html.misc;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.html.HTML;
|
||||
import org.openautonomousconnection.htmlparser.html.body.BodyElement;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class HTMLClass {
|
||||
|
||||
@Getter @Setter
|
||||
protected String className;
|
||||
|
||||
public List<BodyElement> elements;
|
||||
|
||||
public HTMLClass(String className, HTML document) {
|
||||
this.className = className;
|
||||
|
||||
this.elements = new ArrayList<>();
|
||||
|
||||
document.classes.add(this);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.interpreter;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class ElementBuilder {
|
||||
private Class<? extends HTMLElement> clazz;
|
||||
|
||||
@Getter @Setter
|
||||
private Map<String, String> attributes;
|
||||
|
||||
@Getter @Setter
|
||||
private String text, tagName;
|
||||
|
||||
/**
|
||||
* build a html Element
|
||||
* @param parser needed to retrieve element class (tagnames are relative)
|
||||
* @param tagName name of the tag
|
||||
*/
|
||||
public ElementBuilder(Parser parser, String tagName) {
|
||||
this.clazz = parser.getByTagname(tagName);
|
||||
|
||||
this.attributes = new HashMap<>();
|
||||
|
||||
this.tagName = tagName;
|
||||
|
||||
}
|
||||
|
||||
public HTMLElement build() {
|
||||
try {
|
||||
System.out.println(clazz.getSimpleName());
|
||||
|
||||
return HTMLElement.instantiate(clazz, text, attributes);
|
||||
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void appendText(String text) {
|
||||
if(this.text != null)
|
||||
this.text = this.text + text;
|
||||
else
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ElementBuilder{" +
|
||||
"clazz=" + clazz.getSimpleName() +
|
||||
", attributes=" + attributes +
|
||||
", text='" + text + '\'' +
|
||||
", tagName='" + tagName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,447 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.interpreter;
|
||||
|
||||
import org.openautonomousconnection.StringUtils_Remove_Please;
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
import org.openautonomousconnection.htmlparser.TagManager;
|
||||
import org.openautonomousconnection.htmlparser.html.HTML;
|
||||
import org.openautonomousconnection.htmlparser.html.HTMLElement;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.exception.ExpectStringException;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.exception.UnexpectedTokenException;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
|
||||
import lombok.Getter;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Map;
|
||||
import java.util.Stack;
|
||||
|
||||
public class HTMLInterpreter implements Interpreter {
|
||||
@Getter
|
||||
private HTMLState currentState = HTMLState.TAG;
|
||||
|
||||
// Used to go up a layer after comment is opened
|
||||
private HTMLState inbetweenState = HTMLState.COMMENT;
|
||||
|
||||
@Getter
|
||||
private Parser parser;
|
||||
private TagManager tagManager;
|
||||
private Stack<ElementBuilder> elementBuilders;
|
||||
private StringBuilder currentAttribute, currentValue, currentText, currentClosingTag;
|
||||
|
||||
public int currentLine = 1;
|
||||
|
||||
private HTMLElement currentElement;
|
||||
|
||||
private ScriptInterpreter scriptInterpreter;
|
||||
|
||||
public HTMLInterpreter(Parser parser, ScriptInterpreter scriptInterpreter) {
|
||||
this.parser = parser;
|
||||
this.tagManager = parser.getTagManager();
|
||||
this.scriptInterpreter = scriptInterpreter;
|
||||
|
||||
this.currentText = new StringBuilder();
|
||||
this.currentClosingTag = new StringBuilder();
|
||||
this.currentAttribute = new StringBuilder();
|
||||
this.currentValue = new StringBuilder();
|
||||
|
||||
this.elementBuilders = new Stack<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextState(String token) {
|
||||
boolean newLine = token.endsWith("\n");
|
||||
|
||||
if(token.isBlank()) {
|
||||
if (newLine)
|
||||
this.currentLine++;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
this.currentState = switch (this.currentState) {
|
||||
case TAG -> tag(token.strip());
|
||||
case CLOSE_TAG -> close_tag(token.strip());
|
||||
case TEXT -> text(token);
|
||||
case DOCTYPE -> doctype(token.strip());
|
||||
case ATTRIBUTE -> attribute(token.strip());
|
||||
case ATTRIBUTE_EQUALS -> attribute_equals(token.strip());
|
||||
case COMMENT -> comment(token);
|
||||
case VALUE -> value(token);
|
||||
case SCRIPT -> script(token);
|
||||
default -> this.currentState;
|
||||
};
|
||||
|
||||
if(newLine)
|
||||
this.currentLine++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean finished() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public HTML getResult() {
|
||||
return (HTML) this.currentElement;
|
||||
}
|
||||
|
||||
// Only public at the moment because of JavaScriptInterpreter
|
||||
public static String stripTag(String token) {
|
||||
return token.replace("<","").replace(">","");
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a script
|
||||
* @param token script
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState script(String token) {
|
||||
this.scriptInterpreter.currentLine = this.currentLine;
|
||||
|
||||
this.scriptInterpreter.nextState(token);
|
||||
|
||||
// TODO: Change for release. This is debug code
|
||||
if(this.scriptInterpreter.finished()) {
|
||||
// the ScriptInterpreter already has its own ElementBuilder
|
||||
this.elementBuilders.pop();
|
||||
|
||||
this.elementBuilders.push(this.scriptInterpreter.getElementBuilder());
|
||||
|
||||
return this.close_tag(token);
|
||||
}
|
||||
else
|
||||
return HTMLState.SCRIPT;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a tag
|
||||
* @param token tag
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState tag(String token) {
|
||||
String tagName = stripTag(token);
|
||||
|
||||
boolean hasText = this.tagManager.hasText(tagName);
|
||||
|
||||
if(tagName.equalsIgnoreCase("!DOCTYPE"))
|
||||
return HTMLState.DOCTYPE;
|
||||
|
||||
else if(tagName.stripLeading().startsWith("!--"))
|
||||
return returnCommentState();
|
||||
|
||||
this.elementBuilders.push(new ElementBuilder(this.parser, tagName));
|
||||
|
||||
|
||||
String[] split = new String[] {token};
|
||||
|
||||
if(token.contains(">"))
|
||||
split = StringUtils_Remove_Please.splitSeq(new String[]{
|
||||
token.substring(0, token.indexOf('>'))
|
||||
}, ">");
|
||||
|
||||
|
||||
// TODO: Change for release. This is debug code
|
||||
if(this.elementBuilders.peek().getTagName().equals("script"))
|
||||
return split.length == 1 ? HTMLState.SCRIPT : script(token.substring(token.indexOf(">")+1));
|
||||
|
||||
|
||||
if(!token.contains(">"))
|
||||
return HTMLState.ATTRIBUTE;
|
||||
|
||||
if(split.length == 1)
|
||||
return hasText ? HTMLState.TEXT : HTMLState.TAG;
|
||||
else
|
||||
return attribute(token.substring(token.indexOf('>')+1));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Close a tag
|
||||
* @param token closing tag
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState close_tag(String token) {
|
||||
System.out.println(Arrays.toString(this.elementBuilders.toArray()));
|
||||
this.currentClosingTag.append(token.toLowerCase().strip());
|
||||
|
||||
String ct = this.currentClosingTag.toString();
|
||||
|
||||
String tagName = this.elementBuilders.peek().getTagName();
|
||||
|
||||
// one instruction tags don't have a clo
|
||||
if(!this.tagManager.hasText(tagName)) {
|
||||
this.elementBuilders.pop();
|
||||
|
||||
return HTMLState.TEXT;
|
||||
}
|
||||
|
||||
// Comments are special
|
||||
String should = tagName.equals("--") ? tagName + '>' : "</" + tagName + ">";
|
||||
|
||||
System.out.println("should: " + should + " token: " + token);
|
||||
|
||||
if(should.equals(ct)) {
|
||||
|
||||
if(this.currentElement != null)
|
||||
this.currentElement = this.currentElement.append(this.elementBuilders.pop().build());
|
||||
else
|
||||
this.currentElement = this.elementBuilders.pop().build();
|
||||
|
||||
if(!(this.currentElement instanceof HTML))
|
||||
this.currentElement = this.currentElement.getParent();
|
||||
|
||||
this.currentClosingTag = new StringBuilder();
|
||||
return HTMLState.TEXT;
|
||||
|
||||
}
|
||||
|
||||
// </should> not reached yet
|
||||
else if(should.startsWith(ct))
|
||||
return HTMLState.TEXT;
|
||||
|
||||
// token not the same as </should>
|
||||
else
|
||||
throw new UnexpectedTokenException(token, this.currentLine, this.currentState);
|
||||
|
||||
}
|
||||
|
||||
private HTMLState text(String token) {
|
||||
String strip = token.stripLeading();
|
||||
|
||||
// handle string begin
|
||||
if(this.currentText.isEmpty()) {
|
||||
if(strip.startsWith("<"))
|
||||
return tag(strip);
|
||||
|
||||
this.currentText.append(token);
|
||||
|
||||
return HTMLState.TEXT;
|
||||
}
|
||||
|
||||
// handle string end or nested elements
|
||||
else if(token.startsWith("<")) {
|
||||
this.elementBuilders.peek().setText(this.currentText.toString());
|
||||
|
||||
// always reset
|
||||
this.currentText = new StringBuilder();
|
||||
|
||||
if(token.startsWith("</"))
|
||||
return close_tag(token.stripTrailing());
|
||||
else
|
||||
return tag(token.stripTrailing());
|
||||
}
|
||||
|
||||
// continue as text
|
||||
|
||||
else {
|
||||
this.currentText.append(token);
|
||||
return HTMLState.TEXT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Declare an attribute
|
||||
* @param token attribute type
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState attribute(String token) {
|
||||
if(token.startsWith(">") || token.endsWith(">"))
|
||||
return text(token);
|
||||
|
||||
else if(token.contains("=")) {
|
||||
// Recursition if declaration and equals are same token
|
||||
this.currentAttribute = new StringBuilder(token.substring(0, token.indexOf('=')));
|
||||
|
||||
return attribute_equals(token.substring(token.indexOf('=')));
|
||||
}
|
||||
else {
|
||||
this.currentAttribute = new StringBuilder(token);
|
||||
return HTMLState.ATTRIBUTE_EQUALS;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle equals operator between attribute declaration and definition (can only be '='; will throw otherwise)
|
||||
* @param token equals operator
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState attribute_equals(String token) {
|
||||
boolean dq = token.contains("\""), sq = token.contains("'");
|
||||
if(dq || sq) {
|
||||
char quot = dq ? '"' : '\'';
|
||||
// Recursion if declaration and equals are same token
|
||||
|
||||
return value(token.substring(token.indexOf(quot)-1), quot);
|
||||
}
|
||||
else if(token.equals("=")){
|
||||
return HTMLState.VALUE;
|
||||
}
|
||||
else {
|
||||
throw new UnexpectedTokenException(token, this.currentLine, this.currentState);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Define an attribute
|
||||
* @param token attribute value
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState value(String token) {
|
||||
return value(token, ' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Define a string attribute
|
||||
* @param token attribute value
|
||||
* @param quot quotation sign
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState value(String token, char quot) {
|
||||
|
||||
// expected string, got other
|
||||
if(!token.startsWith("'") && token.startsWith("\""))
|
||||
throw new ExpectStringException(token, this.currentLine, this.currentState);
|
||||
|
||||
this.currentValue = new StringBuilder();
|
||||
|
||||
quot = quot != ' ' ? quot : token.charAt(0);
|
||||
|
||||
// split by quote character
|
||||
String[] split = token.split(String.valueOf(quot));
|
||||
|
||||
for(int i = 0; i < split.length; i++)
|
||||
|
||||
// handle escaped quote character
|
||||
if(split[i].endsWith("\\")) {
|
||||
this.currentValue.append(split[i]).append(quot);
|
||||
split[i] = "";
|
||||
}
|
||||
|
||||
// delete first quotation character
|
||||
if(!this.currentValue.isEmpty())
|
||||
this.currentValue.deleteCharAt(0);
|
||||
|
||||
StringBuilder rebuilt = new StringBuilder();
|
||||
|
||||
// TODO possible error source
|
||||
|
||||
for(String s : split)
|
||||
|
||||
if(!s.isEmpty())
|
||||
rebuilt.append(s);
|
||||
|
||||
if(!rebuilt.isEmpty() && this.tagManager.hasText(stripTag(token)))
|
||||
return text(token);
|
||||
|
||||
return HTMLState.ATTRIBUTE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Comment on code
|
||||
* @param token comment
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState comment(String token) {
|
||||
if(this.currentText.isEmpty())
|
||||
this.currentText = new StringBuilder();
|
||||
|
||||
// append comment
|
||||
if(!token.contains("-->")) {
|
||||
this.currentText.append(token);
|
||||
|
||||
return HTMLState.COMMENT;
|
||||
}
|
||||
|
||||
// end comment
|
||||
|
||||
ElementBuilder elementBuilder = new ElementBuilder(this.parser, "--");
|
||||
elementBuilder.setText(this.currentText.toString());
|
||||
|
||||
// always reset
|
||||
this.currentText = new StringBuilder();
|
||||
|
||||
this.elementBuilders.push(elementBuilder);
|
||||
|
||||
if(token.split("-->").length == 1)
|
||||
return commentResetInbetween();
|
||||
|
||||
|
||||
this.currentState = commentResetInbetween();
|
||||
|
||||
return close_tag(
|
||||
token.substring(token.indexOf("-->"))
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Define the doctype
|
||||
* @param token document type
|
||||
* @return next state
|
||||
*/
|
||||
private HTMLState doctype(String token) {
|
||||
String tag = stripTag(token);
|
||||
if(!tag.equalsIgnoreCase("HTML")) {
|
||||
/*
|
||||
Not implemented. Might do so in the future, might not.
|
||||
*/
|
||||
}
|
||||
|
||||
if(token.endsWith(">"))
|
||||
return HTMLState.TEXT;
|
||||
else
|
||||
return HTMLState.DOCTYPE;
|
||||
}
|
||||
|
||||
// Helper methods
|
||||
|
||||
/**
|
||||
* Reset inbetween state
|
||||
* @return previous inbetween state
|
||||
*/
|
||||
private HTMLState commentResetInbetween() {
|
||||
HTMLState temp = this.inbetweenState;
|
||||
|
||||
this.inbetweenState = HTMLState.COMMENT;
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Never forget to set the inbetween state!
|
||||
* @return HTMLState.COMMENT
|
||||
*/
|
||||
private HTMLState returnCommentState() {
|
||||
this.inbetweenState = this.currentState;
|
||||
return HTMLState.COMMENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Continue down without returning own State
|
||||
* @param token next token
|
||||
* @return this.currentState
|
||||
*/
|
||||
private HTMLState nextTokenDontReturn(String token) {
|
||||
this.nextState(token);
|
||||
|
||||
return this.currentState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Continue down without returning own State, and close the current tag
|
||||
* @param token next token
|
||||
* @return this.currentState
|
||||
*/
|
||||
private HTMLState closeTagDontReturn(String token) {
|
||||
this.close_tag(token);
|
||||
|
||||
return this.currentState;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter;
|
||||
|
||||
public interface Interpreter {
|
||||
void nextState(String token);
|
||||
boolean finished();
|
||||
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
|
||||
|
||||
public class ExpectStringException extends HTMLException {
|
||||
public ExpectStringException(String value, int currentLine, HTMLState currentState) {
|
||||
super("Expected string, got: " + value, currentLine, currentState);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
|
||||
|
||||
public class HTMLException extends RuntimeException {
|
||||
public HTMLException(String message, int currentLine, HTMLState currentState) {
|
||||
super(message+ "\nat line: " + currentLine + "\nwith state: " + currentState.toString());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter.html.exception;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState;
|
||||
|
||||
public class UnexpectedTokenException extends HTMLException {
|
||||
public UnexpectedTokenException(String token, int currentLine, HTMLState currentState) {
|
||||
super("Unexpected token: " + token, currentLine, currentState);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter.html.state;
|
||||
|
||||
public enum HTMLAttributeState {
|
||||
DECLARATION,
|
||||
EQUALS
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
// Author: maple
|
||||
// date: 9/24/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.interpreter.html.state;
|
||||
|
||||
public enum HTMLState {
|
||||
TAG,
|
||||
CLOSE_TAG,
|
||||
ATTRIBUTE,
|
||||
ATTRIBUTE_EQUALS,
|
||||
VALUE,
|
||||
TEXT,
|
||||
SCRIPT,
|
||||
COMMENT,
|
||||
DOCTYPE
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
// Author: maple
|
||||
// date: 9/28/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.interpreter.script;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.openautonomousconnection.StringUtils_Remove_Please;
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
import org.openautonomousconnection.htmlparser.TagManager;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.Interpreter;
|
||||
|
||||
public abstract class ScriptInterpreter implements Interpreter {
|
||||
@Getter
|
||||
protected Parser parser;
|
||||
|
||||
@Getter
|
||||
protected ElementBuilder elementBuilder;
|
||||
|
||||
protected TagManager tagManager;
|
||||
|
||||
StringBuilder currentText = null;
|
||||
|
||||
public int currentLine;
|
||||
|
||||
public ScriptInterpreter(Parser parser) {
|
||||
this.parser = parser;
|
||||
this.tagManager = parser.getTagManager();
|
||||
}
|
||||
|
||||
// We stole this spaghetti-abomination from chatgpt. Don't change it, it works (or do if you know better)
|
||||
public String parseScript(String html, int[] indexHolder) {
|
||||
if(this.currentText == null)
|
||||
this.currentText = new StringBuilder();
|
||||
|
||||
|
||||
int i = indexHolder[0];
|
||||
StringBuilder script = new StringBuilder();
|
||||
|
||||
boolean inString = false;
|
||||
boolean inTriple = false;
|
||||
char stringChar = 0; // ' or "
|
||||
int tripleCount = 0;
|
||||
|
||||
while (i < html.length()) {
|
||||
char c = html.charAt(i);
|
||||
|
||||
if(c == '\n')
|
||||
this.currentLine++;
|
||||
|
||||
if (!inString) {
|
||||
if (c == '\'' || c == '"') {
|
||||
|
||||
int ahead = countSameQuotes(html, i, c);
|
||||
if (ahead >= 3) {
|
||||
inString = true;
|
||||
inTriple = true;
|
||||
stringChar = c;
|
||||
i += 3;
|
||||
script.append(stringChar).append(stringChar).append(stringChar);
|
||||
continue;
|
||||
} else {
|
||||
inString = true;
|
||||
inTriple = false;
|
||||
stringChar = c;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (c == '\\') {
|
||||
script.append(c);
|
||||
i++;
|
||||
if (i < html.length())
|
||||
script.append(html.charAt(i));
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inTriple) {
|
||||
int ahead = countSameQuotes(html, i, stringChar);
|
||||
if (ahead >= 3) {
|
||||
script.append(stringChar).append(stringChar).append(stringChar);
|
||||
i += 3;
|
||||
inString = false;
|
||||
inTriple = false;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (c == stringChar) {
|
||||
inString = false;
|
||||
inTriple = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!inString) {
|
||||
int index = html.indexOf('>');
|
||||
|
||||
if(index == -1)
|
||||
continue;
|
||||
|
||||
String closingTag = html.substring(i, index);
|
||||
|
||||
// if(this.tagManager.isTagSpaced(closingTag)) {
|
||||
if(StringUtils_Remove_Please.equalsIgnoreWhiteSpaces("</script", closingTag)) {
|
||||
indexHolder[0] = i + closingTag.length();
|
||||
|
||||
this.currentText = null;
|
||||
|
||||
return script.toString();
|
||||
}
|
||||
// else
|
||||
// System.out.println("NE: " + closingTag);
|
||||
// if (html.startsWith("</script>", i)) {
|
||||
// indexHolder[0] = i + "</script>".length();
|
||||
// return script.toString();
|
||||
// }
|
||||
}
|
||||
|
||||
script.append(c);
|
||||
i++;
|
||||
}
|
||||
|
||||
indexHolder[0] = i;
|
||||
|
||||
this.currentText.append(script);
|
||||
|
||||
return this.currentText.toString();
|
||||
}
|
||||
|
||||
private int countSameQuotes(String s, int index, char quote) {
|
||||
int count = 0;
|
||||
int i = index;
|
||||
while (i < s.length() && s.charAt(i) == quote) {
|
||||
count++;
|
||||
i++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean finished() {
|
||||
return this.currentText == null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
// Author: maple
|
||||
// date: 9/28/25
|
||||
|
||||
package org.openautonomousconnection.htmlparser.interpreter.script.javascript;
|
||||
|
||||
import lombok.Getter;
|
||||
import org.openautonomousconnection.StringUtils_Remove_Please;
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
import org.openautonomousconnection.htmlparser.TagManager;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class JavaScriptInterpreter extends ScriptInterpreter {
|
||||
// TODO: replace with actual interpreter
|
||||
|
||||
@Getter
|
||||
private Parser parser;
|
||||
private TagManager tagManager;
|
||||
|
||||
public JavaScriptInterpreter(Parser parser) {
|
||||
super(parser);
|
||||
this.parser = parser;
|
||||
this.tagManager = parser.getTagManager();
|
||||
}
|
||||
|
||||
private boolean scriptFinished = false;
|
||||
|
||||
private StringBuilder text = new StringBuilder();
|
||||
|
||||
public String getText() {
|
||||
return this.text.toString();
|
||||
}
|
||||
|
||||
boolean inSQ, inDQ;
|
||||
|
||||
@Override
|
||||
public void nextState(String token) {
|
||||
String[] sorted = StringUtils_Remove_Please.containsManySorted(token, "\"", "'");
|
||||
|
||||
this.text.append(token);
|
||||
|
||||
if(sorted.length > 0)
|
||||
if(!sorted[0].isEmpty()) {
|
||||
|
||||
|
||||
|
||||
// for(String s : token.split())
|
||||
|
||||
|
||||
// int indexQuoteChar = token.indexOf(quoteChar);
|
||||
|
||||
|
||||
// xor since this toggles the string case
|
||||
inSQ = sorted[0].equals("'") ^ inSQ;
|
||||
inDQ = sorted[0].equals("\"") ^ inDQ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
String[] split = token.split(sorted[0]);
|
||||
|
||||
// if(!inSQ && !inDQ)
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean finished() {
|
||||
return this.scriptFinished;
|
||||
}
|
||||
|
||||
// private String[][] getTextWithStrings(String token) {
|
||||
// char previous = 0;
|
||||
//
|
||||
// int lastStringIndex = 0;
|
||||
//
|
||||
// List<String> strings = new ArrayList<>(), tokens = new ArrayList<>();
|
||||
// for(char c : token.toCharArray()) {
|
||||
// if(this.isStringEncapsulator(c, previous)) {
|
||||
// if(this.inQuotes()) {
|
||||
// String string = token.substring(lastStringIndex, token.indexOf(c)-1);
|
||||
//
|
||||
// strings.add(string);
|
||||
//
|
||||
// token = string;
|
||||
// }
|
||||
// else
|
||||
//
|
||||
//
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// }
|
||||
|
||||
private boolean inQuotes() {
|
||||
return this.inDQ || this.inSQ;
|
||||
}
|
||||
|
||||
private boolean isStringEncapsulator(char c, char previous) {
|
||||
boolean escaped = previous == '\\';
|
||||
if(c == '\'' && !escaped && !this.inDQ) {
|
||||
this.inSQ = !this.inSQ;
|
||||
return true;
|
||||
}
|
||||
|
||||
else if (!escaped && !this.inSQ) {
|
||||
this.inDQ = !this.inDQ;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package org.openautonomousconnection.htmlparser.interpreter.script.pyscript;
|
||||
|
||||
import org.openautonomousconnection.htmlparser.Parser;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder;
|
||||
import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter;
|
||||
|
||||
public class PyScriptInterpreter extends ScriptInterpreter {
|
||||
|
||||
public PyScriptInterpreter(Parser parser) {
|
||||
super(parser);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextState(String token) {
|
||||
if(this.elementBuilder == null)
|
||||
this.elementBuilder = new ElementBuilder(this.parser, "script");
|
||||
String r = this.parseScript(token, new int[] {0});
|
||||
|
||||
// System.out.println(r);
|
||||
|
||||
// if(r == null)
|
||||
// throw new UnexpectedTokenException("token", this.currentLine, HTMLState.SCRIPT);
|
||||
// if(this.finished())
|
||||
// this.currentElement = new HTMLScript(r);
|
||||
|
||||
if(this.finished()) {
|
||||
this.elementBuilder.setText(r);
|
||||
System.out.println(r);
|
||||
}
|
||||
|
||||
}
|
||||
//
|
||||
// @Override
|
||||
// public boolean finished() {
|
||||
// return this.currentElement != null;
|
||||
// }
|
||||
}
|
||||
Reference in New Issue
Block a user