diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5ff6309 --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### IntelliJ IDEA ### +.idea/modules.xml +.idea/jarRepositories.xml +.idea/compiler.xml +.idea/libraries/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..fca5cc1 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,13 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..6fe652a --- /dev/null +++ b/pom.xml @@ -0,0 +1,61 @@ + + + 4.0.0 + + de.maplesoft.geil + OACHtmlParser + 1.0-SNAPSHOT + + + 22 + 22 + UTF-8 + + + + + gitea + https://repo.unlegitdqrk.dev/api/packages/UnlegitDqrk/maven + + + + + + gitea + https://repo.unlegitdqrk.dev/api/packages/UnlegitDqrk/maven + + + + gitea + https://repo.unlegitdqrk.dev/api/packages/UnlegitDqrk/maven + + + + + + org.projectlombok + lombok + 1.18.34 + provided + + + org.jetbrains + annotations + 24.0.0 + compile + + + dev.unlegitdqrk + unlegitlibrary + 1.6.2 + + + org.python + jython-standalone + 2.7.4 + + + + \ No newline at end of file diff --git a/src/main/java/org/openautonomousconnection/StringUtils_Remove_Please.java b/src/main/java/org/openautonomousconnection/StringUtils_Remove_Please.java new file mode 100644 index 0000000..3ba996b --- /dev/null +++ b/src/main/java/org/openautonomousconnection/StringUtils_Remove_Please.java @@ -0,0 +1,136 @@ +package org.openautonomousconnection; + +import org.jetbrains.annotations.NotNull; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Will be removed once UnlegitLibrary.StringUtils contains these methods + */ + +@Deprecated(since = "1.0") +public class StringUtils_Remove_Please { + + public static boolean equalsIgnoreWhiteSpaces(String s1, String s2) { + + return s1.replaceAll("\\s", "") + .equalsIgnoreCase(s2.replaceAll("\\s", "")); + } + + public static String[] splitSeq(String[] tokens, String seq) { + List _tokens = new ArrayList<>(); + + for(int i = 0; i < tokens.length; i++) { + String s = tokens[i]; + + if(!s.contains(seq)) { + _tokens.add(s); + continue; + } + + String[] split = s.split(seq); + + for(int j = 0; j < split.length-1; j++) + _tokens.add(split[j] + seq); + + if(s.endsWith(seq)) + _tokens.add(split[split.length-1] + seq); + else + _tokens.add(split[split.length-1]); + + + } + + String lastToken = _tokens.getLast(); + + if(!lastToken.isEmpty()) + _tokens.set(_tokens.size()-1, lastToken.substring(0, lastToken.length()-1)); + + return _tokens.toArray(new String[0]); + } + + public static int countSeq(String string, String seq) { + int amount = -1; + + for(String s : string.split(seq)) + amount++; + + return amount; + } + + public static List> getEncapsulatedTexts(String text, String... capsules) { + List> lists = new ArrayList<>(); + + lists.add(new ArrayList<>()); + lists.add(new ArrayList<>()); + + while (!text.isEmpty()) + { + String capsule = containsManySorted(text, capsules)[0]; + + if(capsule.isEmpty()) { + lists.getFirst().add(text); + break; + } + + + String out = text.substring(0, text.indexOf(capsule)); + + text = text.substring(text.indexOf(capsule) + capsule.length()); + + String in = text.substring(0, text.indexOf(capsule)); + + text = text.substring(text.indexOf(capsule) + capsule.length()); + + lists.get(0).add(out); + lists.get(1).add(in); + + } + + return lists; + } + + public static String[] containsMany(String string, String... strings) { + String[] result = new String[strings.length]; + + for(int i = 0; i < strings.length; i++) + if(string.contains(strings[i])) + result[i] = strings[i]; + + return result; + } + + public static String[] containsManySorted(String string, String... strings) { + String[] result = new String[strings.length]; + + StringPositionSorted[] records = new StringPositionSorted[strings.length]; + + for(int i = 0; i < strings.length; i++) + if(string.contains(strings[i])) + records[i] = new StringPositionSorted(strings[i], string.indexOf(strings[i])); + else + records[i] = new StringPositionSorted("", -1); + + Arrays.sort(records); + + int off = 0; + + for(int i = 0; i + off < strings.length; i++) { + while (records[i + off].position == -1 && + i + off + 1 < strings.length) + off++; + result[i] = records[i + off].string(); + } + + return result; + } + + private record StringPositionSorted(String string, int position) implements Comparable { + @Override + public int compareTo(@NotNull StringPositionSorted o) { + return this.position - o.position; + } + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/DocumentBuilder.java b/src/main/java/org/openautonomousconnection/htmlparser/DocumentBuilder.java new file mode 100644 index 0000000..a7a469f --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/DocumentBuilder.java @@ -0,0 +1,157 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser; + +import dev.unlegitdqrk.unlegitlibrary.string.StringUtils; +import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment; +import lombok.Getter; +import lombok.Setter; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DocumentBuilder { + @Getter @Setter + protected String content; + + @Getter + protected List comments; + + @Getter + protected List attributes, texts, tags; + + public DocumentBuilder(String content) { + this.content = content; //content.replace("\n", ""); + this.comments = new ArrayList<>(); + this.attributes = new ArrayList<>(); + this.texts = new ArrayList<>(); + } + + /** + * Extracts all comments and strings into lists + */ + public void extract() { + this.extractComments(); + this.extractStringsAndAttributes(); + this.extractTexts(); + + } + + /** + * inserts the extracts back into the content string + */ + public void insert() { + this.insertTexts(); + this.insertStringsAndAttributes(); + this.insertComments(); + } + + protected void extractComments() { + Pattern pattern = Pattern.compile("", Pattern.DOTALL); + + Matcher matcher = pattern.matcher(content); + + + int index = 0; + + while (matcher.find()) { + this.content = this.content.replace("", ""); + + this.comments.add(new HTMLComment(matcher.group(1))); + + index++; + } + } + + protected void insertComments() { + if(this.comments.isEmpty()) + return; + + int i = 0; + for(; i < this.comments.size(); i++) + this.content = this.content.replace("", this.comments.get(i).toString()); + + for(; i > 0; i--) + this.comments.removeFirst(); + } + + + + protected void extractStringsAndAttributes() { + Pattern pattern = Pattern.compile("\"(.*?)\"|'(.*?)'", Pattern.DOTALL); + + Matcher matcher = pattern.matcher(this.content); + + + int index = 0; + + while (matcher.find()) { + + if(matcher.group(1) != null) { + this.content = this.content.replace("\"" + matcher.group(1) + "\"", "\"S" + index + "\""); + + this.attributes.add(matcher.group(1)); + } + + else { + this.content = this.content.replace("'" + matcher.group(2) + "'", "'S" + index + "'"); + + this.attributes.add(matcher.group(2)); + } + + index++; + } + } + + protected void insertStringsAndAttributes() { + if(this.attributes.isEmpty()) + return; + int i = 0; + for(; i < this.attributes.size(); i++) { + this.content = this.content.replace("\"S" + i + "\"", "\"" + attributes.get(i) + "\""); + this.content = this.content.replace("'S" + i + "'", "'" + attributes.get(i) + "'"); + } + + for(; i > 0; i--) + this.attributes.removeFirst(); + } + + protected void extractTexts() { + Pattern pattern = Pattern.compile(">([^<]+)(?=<)", Pattern.DOTALL); + + Matcher matcher = pattern.matcher(content); + + + int index = 0; + + while (matcher.find()) { + if(StringUtils.isEmptyString(matcher.group(1))) + continue; + + this.content = this.content.replace(">" + matcher.group(1) + "<", ">T" + index + "<"); + + this.texts.add(matcher.group(1)); + + index++; + } + } + + protected void insertTexts() { + if(this.texts.isEmpty()) + return; + + int i = 0; + for(; i < this.texts.size(); i++) + this.content = this.content.replace(">T" + i + "<", ">" + this.texts.get(i) + "<"); + + for(; i > 0; i--) + this.texts.removeFirst(); + } + + protected void extractTags() { + + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/ParseResult.java b/src/main/java/org/openautonomousconnection/htmlparser/ParseResult.java new file mode 100644 index 0000000..e73b81b --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/ParseResult.java @@ -0,0 +1,18 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser; + +import org.jetbrains.annotations.NotNull; + +public record ParseResult(String tagname, String assumption) implements Comparable { + + public int compareSelf() { + return this.tagname.compareToIgnoreCase(this.assumption); + } + + @Override + public int compareTo(@NotNull ParseResult o) { + return this.compareSelf() - o.compareSelf(); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/Parser.java b/src/main/java/org/openautonomousconnection/htmlparser/Parser.java new file mode 100644 index 0000000..d615ec7 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/Parser.java @@ -0,0 +1,105 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser; + +import org.openautonomousconnection.htmlparser.html.CustomHTMLElement; +import org.openautonomousconnection.htmlparser.html.HTML; +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter; +import lombok.Getter; +import org.openautonomousconnection.htmlparser.interpreter.script.pyscript.PyScriptInterpreter; + +import java.util.Arrays; +import java.util.Objects; + +import static org.openautonomousconnection.StringUtils_Remove_Please.splitSeq; + + +public class Parser { + + public static String DEFAULT_TITLE = "untitled"; + + @Getter + private final TagManager tagManager; + + @Getter + private HTML html; + + private final String[] tokens; + + public Parser(String content, TagManager tagManager) { + + this.html = new HTML(); + this.tagManager = tagManager; + + String[] split = splitSeq(new String[]{content}, ">"); + + // TODO: you can do this using regex \\s in one line instead of 3 + + String[] split_spaces = splitSeq(split, " "); + String[] split_tabs = splitSeq(split_spaces, "\t"); + + this.tokens = splitSeq(split_tabs, "\n"); + +// for(String s : tokens) +// System.out.print(s); +// System.out.println(); + +// List> l = StringUtils_Remove_Please.getEncapsulatedTexts(""" +// +// part UNO"part dos'stillpartdos'" 'PART TRES YAYAYYA' and gone bye. +// +// """, "\"", "'"); +// +// for(List list : l) +// for(String s : list) +// System.out.println("s: " + s); + + System.out.println(); + System.out.println(this.parse()); + + } + + public HTML parse() { + HTMLInterpreter interpreter = new HTMLInterpreter(this, new PyScriptInterpreter(this)); + + for(String s : this.tokens) + interpreter.nextState(s); + + + return interpreter.getResult(); + } + + public Class getByTagname(String tagName) { + tagName = tagName.toLowerCase(); + + Class res = this.tagManager.tags.get(tagName); + + return Objects.requireNonNullElse(res, CustomHTMLElement.class); + } + + public static void main(String[] args) { + + Parser parser = new Parser(""" + + + +

a paragraph in color! test

+
+ + + + + + + + + + """, new TagManager()); + } + + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/TagManager.java b/src/main/java/org/openautonomousconnection/htmlparser/TagManager.java new file mode 100644 index 0000000..6fab1df --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/TagManager.java @@ -0,0 +1,117 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import org.openautonomousconnection.htmlparser.html.NoContent; +import org.openautonomousconnection.htmlparser.html.body.HTMLBody; +import org.openautonomousconnection.htmlparser.html.body.buttons.HTMLButton; +import org.openautonomousconnection.htmlparser.html.body.form.HTMLForm; +import org.openautonomousconnection.htmlparser.html.body.form.HTMLInput; +import org.openautonomousconnection.htmlparser.html.body.form.HTMLLabel; +import org.openautonomousconnection.htmlparser.html.body.link.HTMLArea; +import org.openautonomousconnection.htmlparser.html.body.link.HTMLHyperlink; +import org.openautonomousconnection.htmlparser.html.body.link.HTMLImage; +import org.openautonomousconnection.htmlparser.html.body.misc.HTMLBreak; +import org.openautonomousconnection.htmlparser.html.body.misc.HTMLComment; +import org.openautonomousconnection.htmlparser.html.body.misc.HTMLDiv; +import org.openautonomousconnection.htmlparser.html.body.misc.HTMLScript; +import org.openautonomousconnection.htmlparser.html.body.texts.HTMLAbbreviation; +import org.openautonomousconnection.htmlparser.html.body.texts.heading.HTMLHeading; +import org.openautonomousconnection.htmlparser.html.body.texts.heading.HeadingType; +import org.openautonomousconnection.htmlparser.html.body.texts.text.HTMLText; +import org.openautonomousconnection.htmlparser.html.body.texts.text.TextType; +import org.openautonomousconnection.htmlparser.html.header.HTMLHeader; +import org.openautonomousconnection.htmlparser.html.header.HTMLTitle; + +import java.util.*; + +public class TagManager { + public Map> tags; + + public void putTag(Class tag) { + try { + this.tags.put((String) tag.getDeclaredField("TAG").get(tag), tag); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public boolean isTag(String tagName) { + return this.tags.containsKey(tagName); + } + + public boolean isTagSpaced(String tagName) { + return this.isTag(tagName.replaceAll("\\s", "")); + } + +// public boolean hasClosingTag(String tagName) { +// try { +// return (boolean) this.tags.get(tagName).getField("CLOSEABLE").get(null); +// } catch (NoSuchFieldException | IllegalAccessException e) { +// throw new RuntimeException(e); +// } +// } + + public boolean hasText(String tagName) { + if(!this.isTag(tagName)) + return false; + + return !this.tags.get(tagName).isAnnotationPresent(NoContent.class); + } + + public TreeSet couldBe(String string) { + TreeSet result = new TreeSet<>(); + + for(String tagName : tags.keySet()) + if(tagName.contains(string)) + result.add(new ParseResult(string, tagName)); + + return result; + } + + public TagManager() { + // Map default tags + + this.tags = new HashMap<>(); + + // buttons + this.putTag(HTMLButton.class); + + // forms + this.putTag(HTMLForm.class); + this.putTag(HTMLInput.class); + this.putTag(HTMLLabel.class); + + // links + this.putTag(HTMLArea.class); + this.putTag(HTMLHyperlink.class); + this.putTag(HTMLImage.class); + + // misc + this.putTag(HTMLBreak.class); + this.putTag(HTMLDiv.class); + this.putTag(HTMLScript.class); + this.putTag(HTMLComment.class); + + // headings + for(HeadingType type : HeadingType.values()) + this.tags.put(type.getTag(), HTMLHeading.class); + + // texts + for(TextType type : TextType.values()) + this.tags.put(type.getTag(), HTMLText.class); + + this.putTag(HTMLAbbreviation.class); + + // headers + this.putTag(HTMLHeader.class); + this.putTag(HTMLTitle.class); + + // main elements + this.putTag(HTMLBody.class); + this.putTag(HTMLHeader.class); + this.putTag(HTMLImage.class); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/exception/NullTagException.java b/src/main/java/org/openautonomousconnection/htmlparser/exception/NullTagException.java new file mode 100644 index 0000000..89f8b5a --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/exception/NullTagException.java @@ -0,0 +1,10 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser.exception; + +public class NullTagException extends NullPointerException { + public NullTagException() { + super("Tag can't be null!"); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/CustomHTMLElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/CustomHTMLElement.java new file mode 100644 index 0000000..c834cdd --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/CustomHTMLElement.java @@ -0,0 +1,35 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html; + +import lombok.Getter; +import lombok.Setter; + +import java.util.Map; +import java.util.Optional; + +public class CustomHTMLElement extends HTMLElement{ + + public static final boolean CLOSEABLE = true; + + @Getter @Setter + private String text; + + public CustomHTMLElement(String tag, String text, Map attributes) { + this.tagName = tag; + this.text = text; + this.attributes = attributes; + + this.id = Optional.of(attributes.get("id")); + } + +// @Override +// public String toString() { +// return otag() + this.text + ctag(); +// } + + public static CustomHTMLElement instantiate(String text, Map attributes) { + return new CustomHTMLElement("UNKNOWN_ELEMENT", text, attributes); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/HTML.java b/src/main/java/org/openautonomousconnection/htmlparser/html/HTML.java new file mode 100644 index 0000000..26aa779 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/HTML.java @@ -0,0 +1,90 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html; + +import org.openautonomousconnection.htmlparser.html.body.HTMLBody; +import org.openautonomousconnection.htmlparser.html.header.HTMLHeader; +import org.openautonomousconnection.htmlparser.html.misc.HTMLClass; +import lombok.Getter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Parent element for all HTML content + */ +public class HTML extends HTMLElement{ + public static final String TAG = "html"; + + public static final boolean CLOSEABLE = true; + + public final List classes; + + public HTML(HTMLHeader header, HTMLBody body) { + super(null); + this.header = header; + this.body = body; + + this.tagName = TAG; + + this.classes = new ArrayList<>(); + } + + public HTML() { + this(null, null); + } + + @Getter + private HTMLHeader header; + + @Getter + private HTMLBody body; + + @Override + public HTMLElement append(HTMLElement element) { + if(element instanceof HTMLHeader) + this.header = (HTMLHeader) element; + else if(element instanceof HTMLBody) + this.body = (HTMLBody) element; + else + super.append(element); + + return element; + + } + + public HTMLBody setBody(HTMLBody body) { + this.body = body; + + this.body.parent = this; + + return this.body; + } + + public HTMLHeader setHeader(HTMLHeader header) { + this.header = header; + + this.header.parent = this; + + return this.header; + } + +// @Override +// public String toString() { +// return otag() +// + "\n\t" +// + header.toString() + "\n\t" +// + body.toString() + "\n" +// + ctag(); +// } + + public static HTML instantiate(String text, Map attributes) { + HTML html = new HTML(); + + html.setAttributes(attributes); + + return html; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/HTMLElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/HTMLElement.java new file mode 100644 index 0000000..511c2d7 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/HTMLElement.java @@ -0,0 +1,142 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html; + +import org.openautonomousconnection.htmlparser.html.misc.HTMLClass; +import lombok.Getter; +import lombok.Setter; +import org.jetbrains.annotations.Nullable; + +import java.lang.reflect.InvocationTargetException; +import java.util.*; + +public abstract class HTMLElement { + + @Getter + protected HTMLElement parent; + + @Getter + protected List children; + + @Getter + protected String tagName; + + @Getter @Setter + protected Optional id; + + @Getter @Setter + protected Optional htmlClass; + + @Getter @Setter + protected Map attributes; + + protected HTMLElement(@Nullable HTMLElement parent) { + this.parent = parent; + + this.attributes = new HashMap<>(); + + this.id = Optional.empty(); + this.htmlClass = Optional.empty(); + + this.children = new ArrayList<>(); + } + + protected HTMLElement() { + this(null); + } + + +// @Override +// public abstract String toString(); + + public HTMLElement append(HTMLElement element) { + element.parent = this; + this.children.add(element); + return element; + } + + public final String toString() { + if(this.getClass().isAnnotationPresent(NoContent.class)) + return otag(); + + + StringBuilder sb = new StringBuilder(otag()).append("\n\t"); + + for(HTMLElement child : this.children) + sb.append(child).append("\n\t"); + + return sb.append(ctag()).toString(); + } + + /** + * Reload ID from attributes map + */ + public void reloadId() { + if(this.attributes.containsKey("id")) { + this.id = Optional.of(this.attributes.get("id")); + this.attributes.remove("id"); + } + } + + protected String getIdString() { + return this.id.map(string -> "id='" + string +"' ").orElse(""); + } + + protected String getClassString() { + return this.htmlClass.map(htmlClass -> "class='" + htmlClass.getClassName() + "' ").orElse(""); + } + + protected String getAttributesString() { + StringBuilder sb = new StringBuilder(" "); + for(String string : this.attributes.keySet()) + sb.append(string) + .append("='") + .append(this.attributes.get(string)) + .append("' "); + + if(!sb.toString().equals(" ")) + return sb.substring(0, sb.length()-1); + else + return ""; + } + + protected String otag() { + return "<" + this.tagName + getIdString() + getClassString() + getAttributesString() + ">"; + } + + protected String otag(String _attributes) { + return "<" + this.tagName + getIdString() + getClassString() + " " + _attributes + getAttributesString() + ">"; + } + + protected String ctag() { + return ""; + } + + protected String cutTag(String string, String _attributes) { + return string.replaceFirst("<" + tagName + " " + _attributes + ">", "").replaceFirst("", "").trim(); + } + + protected String cutTag(String string) { + return cutTag(string, ""); + } + +// protected static Class getNext(Parser parser, String string) { +// String sub = string.substring(string.indexOf("<" + 1)).split(" ")[0]; +// +// return parser.getByTagname(sub); +// } +// + + /** + * only use if child objects can exist + * @return parsed child objects + */ + + // TODO: 1. handle comments 2. somehow handle non-tag >s & <s + + public static HTMLElement instantiate(Class elementClass, String text, Map attributes) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { + return (HTMLElement) elementClass.getMethod("instantiate", String.class, Map.class).invoke(null, text, attributes); + + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/NoContent.java b/src/main/java/org/openautonomousconnection/htmlparser/html/NoContent.java new file mode 100644 index 0000000..1437d5a --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/NoContent.java @@ -0,0 +1,11 @@ +package org.openautonomousconnection.htmlparser.html; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface NoContent { +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/BodyElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/BodyElement.java new file mode 100644 index 0000000..0e43814 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/BodyElement.java @@ -0,0 +1,22 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import lombok.Getter; +import lombok.Setter; +import org.jetbrains.annotations.Nullable; + +public abstract class BodyElement extends HTMLElement { + @Getter @Setter + protected String text; + + protected BodyElement(@Nullable HTMLElement parent) { + super(parent); + } + + protected BodyElement() { + + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/HTMLBody.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/HTMLBody.java new file mode 100644 index 0000000..6c7c85e --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/HTMLBody.java @@ -0,0 +1,42 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class HTMLBody extends HTMLElement { + + public static final String TAG = "body"; + + public static final boolean CLOSEABLE = true; + + + public HTMLBody(List elements) { + this.children = elements; + + this.tagName = TAG; + } + + public HTMLBody(HTMLElement... elements) { + this.children = new ArrayList<>(Arrays.stream(elements).toList()); + + this.tagName = TAG; + } + + public static HTMLBody instantiate(String text, Map attributes) { + HTMLBody body = new HTMLBody(); + + body.setAttributes(attributes); + + body.reloadId(); + + return body; + } + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/ButtonElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/ButtonElement.java new file mode 100644 index 0000000..8a5ab57 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/ButtonElement.java @@ -0,0 +1,16 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.buttons; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +public abstract class ButtonElement extends BodyElement { + public String getScript() { + return this.attributes.get("onclick"); + } + + public void setScript(String script) { + this.attributes.replace("onclick", script); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/HTMLButton.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/HTMLButton.java new file mode 100644 index 0000000..029b5d9 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/buttons/HTMLButton.java @@ -0,0 +1,34 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.buttons; + +import java.util.Map; + +public class HTMLButton extends ButtonElement { + + public static final String TAG = "button"; + + public static final boolean CLOSEABLE = false; + + public HTMLButton(String text) { + this.text = text; + this.tagName = TAG; + } + public HTMLButton(String text, String script) { + this.text = text; + this.setScript(script); + + this.tagName = TAG; + } + + public static HTMLButton instantiate(String text, Map attributes) { + HTMLButton form = new HTMLButton(text); + + form.setAttributes(attributes); + + form.reloadId(); + + return form; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/FormElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/FormElement.java new file mode 100644 index 0000000..88bf7bd --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/FormElement.java @@ -0,0 +1,10 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.form; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +public abstract class FormElement extends BodyElement { + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLForm.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLForm.java new file mode 100644 index 0000000..50b84fd --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLForm.java @@ -0,0 +1,34 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.form; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; + +import java.util.Map; + +public class HTMLForm extends HTMLElement { + + public static final String TAG = "form"; + + public static final boolean CLOSEABLE = true; + + public HTMLForm() { + this.tagName = TAG; + } + + public HTMLForm(String action) { + this.attributes.put("action", action); + this.tagName = TAG; + } + + public static HTMLForm instantiate(String text, Map attributes) { + HTMLForm form = new HTMLForm(); + + form.setAttributes(attributes); + + form.reloadId(); + + return form; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLInput.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLInput.java new file mode 100644 index 0000000..0529777 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLInput.java @@ -0,0 +1,59 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser.html.body.form; + +import org.openautonomousconnection.htmlparser.html.NoContent; + +import java.util.Map; + +@NoContent +public class HTMLInput extends FormElement { + + public static final String TAG = "input"; + + public static final boolean CLOSEABLE = false; + + public HTMLInput() { + + } + + public HTMLInput(String type, String name) { + this.setType(type); + this.setName(name); + + this.tagName = TAG; + } + + public String getType() { + return this.attributes.get("title"); + } + + public String getName() { + return this.attributes.get("title"); + } + + public void setType(String type) { + this.attributes.replace("type", type); + } + + public void setName(String name) { + this.attributes.replace("name", name); + } + + + public static HTMLInput instantiate(String text, Map attributes) { + HTMLInput input = new HTMLInput(attributes.get("type"), attributes.get("name")); + + input.setText(text); + + attributes.remove("type"); + attributes.remove("name"); + + input.setAttributes(attributes); + + input.reloadId(); + + return input; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLLabel.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLLabel.java new file mode 100644 index 0000000..d4c212b --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/form/HTMLLabel.java @@ -0,0 +1,37 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.form; + +import java.util.Map; + +public class HTMLLabel extends FormElement { + + public static final String TAG = "label"; + + public static final boolean CLOSEABLE = true; + + public HTMLLabel(String text) { + this.text = text; + + this.tagName = TAG; + } + + public String get_for() { + return this.attributes.get("for"); + } + + public void set_for(String _for) { + this.attributes.replace("for", _for); + } + + public static HTMLLabel instantiate(String text, Map attributes) { + HTMLLabel label = new HTMLLabel(text); + + label.setAttributes(attributes); + + label.reloadId(); + + return label; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLArea.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLArea.java new file mode 100644 index 0000000..6a1fb47 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLArea.java @@ -0,0 +1,53 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.link; + +import java.util.Map; + +public class HTMLArea extends LinkElement { + + public static final String TAG = "area"; + + public static final boolean CLOSEABLE = true; + + public String getShape() { + return this.attributes.get("shape"); + } + + public String getCoords() { + return this.attributes.get("coords"); + } + + public void setShape(String shape) { + this.attributes.replace("shape", shape); + } + + public void setCoords(String coords) { + this.attributes.replace("coords", coords); + } + + public HTMLArea() { + this.tagName = TAG; + } + + public HTMLArea(String src, String shape, String coords) { + this.setSource(src); + this.setShape(shape); + this.setCoords(coords); + + this.tagName = TAG; + } + + public static HTMLArea instantiate(String text, Map attributes) { + HTMLArea area = new HTMLArea(); + + area.setText(text); + + area.setAttributes(attributes); + + area.reloadId(); + + return area; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLHyperlink.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLHyperlink.java new file mode 100644 index 0000000..9e5f5db --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLHyperlink.java @@ -0,0 +1,36 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.link; + +import java.util.Map; + +public class HTMLHyperlink extends LinkElement { + + public static final String TAG = "a"; + + public static final boolean CLOSEABLE = true; + + public HTMLHyperlink(String text) { + this.text = text; + + this.tagName = TAG; + } + + public HTMLHyperlink(String text, String src) { + this.text = text; + this.setSource(src); + + this.tagName = TAG; + } + + public static HTMLHyperlink instantiate(String text, Map attributes) { + HTMLHyperlink hyperlink = new HTMLHyperlink(text); + + hyperlink.setAttributes(attributes); + + hyperlink.reloadId(); + + return hyperlink; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLImage.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLImage.java new file mode 100644 index 0000000..5e8c57b --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/HTMLImage.java @@ -0,0 +1,34 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.link; + +import java.util.Map; + +public class HTMLImage extends LinkElement { + + public static final String TAG = "img"; + + public static final boolean CLOSEABLE = false; + + public HTMLImage() { + this.tagName = TAG; + } + + public HTMLImage(String src) { + this.setSource(src); + this.tagName = TAG; + } + + public static HTMLImage instantiate(String text, Map attributes) { + HTMLImage image = new HTMLImage(); + + image.setText(text); + + image.setAttributes(attributes); + + image.reloadId(); + + return image; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/LinkElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/LinkElement.java new file mode 100644 index 0000000..e3d47a4 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/link/LinkElement.java @@ -0,0 +1,20 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.link; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +public abstract class LinkElement extends BodyElement { + protected LinkElement() { + + } + + public String getSource() { + return this.attributes.get("src"); + } + + public void setSource(String source) { + this.attributes.replace("src", source); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLBreak.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLBreak.java new file mode 100644 index 0000000..8cd9e8d --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLBreak.java @@ -0,0 +1,34 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.misc; + +import org.openautonomousconnection.htmlparser.html.NoContent; +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +import java.util.Map; + +@NoContent +public class HTMLBreak extends BodyElement { + + public static final String TAG = "br"; + + public static final boolean CLOSEABLE = false; + + public HTMLBreak() { + this.tagName = TAG; + } + + + public static HTMLBreak instantiate(String text, Map attributes) { + HTMLBreak hbreak = new HTMLBreak(); + + hbreak.setText(text); + + hbreak.setAttributes(attributes); + + hbreak.reloadId(); + + return hbreak; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLComment.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLComment.java new file mode 100644 index 0000000..7184338 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLComment.java @@ -0,0 +1,51 @@ +package org.openautonomousconnection.htmlparser.html.body.misc; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import lombok.Getter; +import lombok.Setter; + +import java.util.Map; + +@Getter @Setter + +public class HTMLComment extends HTMLElement { + public static String TAG = "--"; + + private String text; + + public static final boolean CLOSEABLE = true; + + public HTMLComment(String text) { + this.text = text; + this.tagName = ""; + } + + public static HTMLComment instantiate(String text, Map attributes) { + return new HTMLComment(text); + } + + @Override + protected String getIdString() { + return ""; + } + + @Override + protected String getClassString() { + return ""; + } + + @Override + protected String getAttributesString() { + return ""; + } + + @Override + protected String otag() { + return ""; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLDiv.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLDiv.java new file mode 100644 index 0000000..b212b9b --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLDiv.java @@ -0,0 +1,29 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.misc; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; + +import java.util.Map; + +public class HTMLDiv extends HTMLElement { + + public static final String TAG = "div"; + + public static final boolean CLOSEABLE = true; + + public HTMLDiv() { + this.tagName = TAG; + } + + public static HTMLDiv instantiate(String text, Map attributes) { + HTMLDiv div = new HTMLDiv(); + + div.setAttributes(attributes); + + div.reloadId(); + + return div; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLScript.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLScript.java new file mode 100644 index 0000000..6b27a11 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/misc/HTMLScript.java @@ -0,0 +1,30 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.misc; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +import java.util.Map; + +public class HTMLScript extends BodyElement { + + public static final String TAG = "script"; + + public static final boolean CLOSEABLE = true; + + public HTMLScript(String text) { + this.text = text; + this.tagName = TAG; + } + + public static HTMLScript instantiate(String text, Map attributes) { + HTMLScript script = new HTMLScript(text); + + script.setAttributes(attributes); + + script.reloadId(); + + return script; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/HTMLAbbreviation.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/HTMLAbbreviation.java new file mode 100644 index 0000000..51ba23c --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/HTMLAbbreviation.java @@ -0,0 +1,42 @@ +package org.openautonomousconnection.htmlparser.html.body.texts; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; + +import java.util.Map; + +public class HTMLAbbreviation extends BodyElement { + + public static final String TAG = "abbr"; + + public static final boolean CLOSEABLE = true; + + public HTMLAbbreviation(String text) { + this.text = text; + this.tagName = TAG; + } + + public HTMLAbbreviation(String text, String title) { + this.text = text; + this.setTitle(title); + + this.tagName = TAG; + } + + public String getTitle() { + return this.attributes.get("title"); + } + + public void setTitle(String title) { + this.attributes.replace("title", title); + } + + public static HTMLAbbreviation instantiate(String text, Map attributes) { + HTMLAbbreviation abbreviation = new HTMLAbbreviation(text); + + abbreviation.setAttributes(attributes); + + abbreviation.reloadId(); + + return abbreviation; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HTMLHeading.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HTMLHeading.java new file mode 100644 index 0000000..5c53eae --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HTMLHeading.java @@ -0,0 +1,39 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.texts.heading; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; +import lombok.Getter; + +import java.util.Map; + +public class HTMLHeading extends BodyElement { + + @Getter + protected HeadingType type; + + public static final boolean CLOSEABLE = true; + + public HTMLHeading(String text, HeadingType type) { + this.text = text; + this.type = type; + + this.tagName = this.type.getTag(); + } + + public void setType(HeadingType type) { + this.type = type; + this.tagName = this.type.getTag(); + } + + public static HTMLHeading instantiate(String text, Map attributes) { + HTMLHeading heading = new HTMLHeading(text, HeadingType.H1); + + heading.setAttributes(attributes); + + heading.reloadId(); + + return heading; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HeadingType.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HeadingType.java new file mode 100644 index 0000000..67502e5 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/heading/HeadingType.java @@ -0,0 +1,23 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.body.texts.heading; + +import lombok.Getter; + +public enum HeadingType { + + H1("h1"), + H2("h2"), + H3("h3"), + H4("h4"), + H5("h5"), + H6("h6"); + + @Getter + private String tag; + + HeadingType(String tag) { + this.tag = tag; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/HTMLText.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/HTMLText.java new file mode 100644 index 0000000..98f58c6 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/HTMLText.java @@ -0,0 +1,36 @@ +package org.openautonomousconnection.htmlparser.html.body.texts.text; + +import org.openautonomousconnection.htmlparser.html.body.BodyElement; +import lombok.Getter; + +import java.util.Map; + +public class HTMLText extends BodyElement { + + @Getter + private TextType type; + + public static final boolean CLOSEABLE = true; + + protected HTMLText(String text, TextType type) { + this.text = text; + this.type = type; + + this.tagName = this.type.getTag(); + } + + public void setType(TextType type) { + this.type = type; + this.tagName = this.type.getTag(); + } + + public static HTMLText instantiate(String text, Map attributes) { + HTMLText htext = new HTMLText(text, TextType.PARAGRAPH); + + htext.setAttributes(attributes); + + htext.reloadId(); + + return htext; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/TextType.java b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/TextType.java new file mode 100644 index 0000000..b44d9bf --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/body/texts/text/TextType.java @@ -0,0 +1,26 @@ +package org.openautonomousconnection.htmlparser.html.body.texts.text; + +import lombok.Getter; + +public enum TextType { + PARAGRAPH("p"), + BOLD("b"), + STRONG("strong"), + ITALIC("i"), + EMPHASIZED("em"), + MARKED("mark"), + SMALL("small"), + DELETED("del"), + INSERTED("ins"), + SUBSCRIPT("sub"), + SUPERSCRIPT("sup"), + UNDERLINED("u"), + SPAN("span"); + + @Getter + private String tag; + + TextType(String tag) { + this.tag = tag; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLHeader.java b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLHeader.java new file mode 100644 index 0000000..74b7e79 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLHeader.java @@ -0,0 +1,49 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.header; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import lombok.Getter; + +import java.util.*; + +public class HTMLHeader extends HTMLElement { + + public static final String TAG = "head"; + + public static final boolean CLOSEABLE = true; + + @Getter + private HTMLTitle title = null; + + public HTMLHeader(List elements) { + + for(HeaderElement element : elements) + if(element instanceof HTMLTitle title) + this.title = title; + else + this.append(element); + + //this.elements = elements; + + if(title == null) + title = new HTMLTitle(); + + this.tagName = TAG; + } + + public HTMLHeader(HeaderElement... element) { + this(new ArrayList<>(Arrays.stream(element).toList())); + } + + public static HTMLHeader instantiate(String text, Map attributes) { + HTMLHeader header = new HTMLHeader(); + + header.setAttributes(attributes); + + header.reloadId(); + + return header; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLTitle.java b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLTitle.java new file mode 100644 index 0000000..aa5d5bc --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HTMLTitle.java @@ -0,0 +1,35 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.header; + +import org.openautonomousconnection.htmlparser.Parser; + +import java.util.Map; + +public class HTMLTitle extends HeaderElement { + + public static final String TAG = "title"; + + public static final boolean CLOSEABLE = true; + + public HTMLTitle(String text) { + this.text = text; + + this.tagName = TAG; + } + + public HTMLTitle() { + this(Parser.DEFAULT_TITLE); + } + + public static HTMLTitle instantiate(String text, Map attributes) { + HTMLTitle title = new HTMLTitle(text); + + title.setAttributes(attributes); + + title.reloadId(); + + return title; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/header/HeaderElement.java b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HeaderElement.java new file mode 100644 index 0000000..2c9f804 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/header/HeaderElement.java @@ -0,0 +1,24 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.header; + +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import org.openautonomousconnection.htmlparser.html.body.BodyElement; +import lombok.Getter; +import lombok.Setter; +import org.jetbrains.annotations.Nullable; + +public abstract class HeaderElement extends BodyElement { + @Getter @Setter + protected String text; + + protected HeaderElement(@Nullable HTMLElement parent) { + super(parent); + } + + protected HeaderElement() { + + } + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/html/misc/HTMLClass.java b/src/main/java/org/openautonomousconnection/htmlparser/html/misc/HTMLClass.java new file mode 100644 index 0000000..e575675 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/html/misc/HTMLClass.java @@ -0,0 +1,30 @@ +// Author: maple +// date: 9/20/25 + +package org.openautonomousconnection.htmlparser.html.misc; + +import org.openautonomousconnection.htmlparser.html.HTML; +import org.openautonomousconnection.htmlparser.html.body.BodyElement; +import lombok.Getter; +import lombok.Setter; + +import java.util.ArrayList; +import java.util.List; + +public class HTMLClass { + + @Getter @Setter + protected String className; + + public List elements; + + public HTMLClass(String className, HTML document) { + this.className = className; + + this.elements = new ArrayList<>(); + + document.classes.add(this); + } + + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/ElementBuilder.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/ElementBuilder.java new file mode 100644 index 0000000..fbe5228 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/ElementBuilder.java @@ -0,0 +1,64 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser.interpreter; + +import org.openautonomousconnection.htmlparser.Parser; +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import lombok.Getter; +import lombok.Setter; + +import java.lang.reflect.InvocationTargetException; +import java.util.HashMap; +import java.util.Map; + +public class ElementBuilder { + private Class clazz; + + @Getter @Setter + private Map attributes; + + @Getter @Setter + private String text, tagName; + + /** + * build a html Element + * @param parser needed to retrieve element class (tagnames are relative) + * @param tagName name of the tag + */ + public ElementBuilder(Parser parser, String tagName) { + this.clazz = parser.getByTagname(tagName); + + this.attributes = new HashMap<>(); + + this.tagName = tagName; + + } + + public HTMLElement build() { + try { + System.out.println(clazz.getSimpleName()); + + return HTMLElement.instantiate(clazz, text, attributes); + } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + + public void appendText(String text) { + if(this.text != null) + this.text = this.text + text; + else + this.text = text; + } + + @Override + public String toString() { + return "ElementBuilder{" + + "clazz=" + clazz.getSimpleName() + + ", attributes=" + attributes + + ", text='" + text + '\'' + + ", tagName='" + tagName + '\'' + + '}'; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/HTMLInterpreter.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/HTMLInterpreter.java new file mode 100644 index 0000000..62b35e3 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/HTMLInterpreter.java @@ -0,0 +1,447 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser.interpreter; + +import org.openautonomousconnection.StringUtils_Remove_Please; +import org.openautonomousconnection.htmlparser.Parser; +import org.openautonomousconnection.htmlparser.TagManager; +import org.openautonomousconnection.htmlparser.html.HTML; +import org.openautonomousconnection.htmlparser.html.HTMLElement; +import org.openautonomousconnection.htmlparser.interpreter.html.exception.ExpectStringException; +import org.openautonomousconnection.htmlparser.interpreter.html.exception.UnexpectedTokenException; +import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState; +import lombok.Getter; +import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter; + +import java.util.Arrays; +import java.util.Map; +import java.util.Stack; + +public class HTMLInterpreter implements Interpreter { + @Getter + private HTMLState currentState = HTMLState.TAG; + + // Used to go up a layer after comment is opened + private HTMLState inbetweenState = HTMLState.COMMENT; + + @Getter + private Parser parser; + private TagManager tagManager; + private Stack elementBuilders; + private StringBuilder currentAttribute, currentValue, currentText, currentClosingTag; + + public int currentLine = 1; + + private HTMLElement currentElement; + + private ScriptInterpreter scriptInterpreter; + + public HTMLInterpreter(Parser parser, ScriptInterpreter scriptInterpreter) { + this.parser = parser; + this.tagManager = parser.getTagManager(); + this.scriptInterpreter = scriptInterpreter; + + this.currentText = new StringBuilder(); + this.currentClosingTag = new StringBuilder(); + this.currentAttribute = new StringBuilder(); + this.currentValue = new StringBuilder(); + + this.elementBuilders = new Stack<>(); + } + + @Override + public void nextState(String token) { + boolean newLine = token.endsWith("\n"); + + if(token.isBlank()) { + if (newLine) + this.currentLine++; + + return; + } + + this.currentState = switch (this.currentState) { + case TAG -> tag(token.strip()); + case CLOSE_TAG -> close_tag(token.strip()); + case TEXT -> text(token); + case DOCTYPE -> doctype(token.strip()); + case ATTRIBUTE -> attribute(token.strip()); + case ATTRIBUTE_EQUALS -> attribute_equals(token.strip()); + case COMMENT -> comment(token); + case VALUE -> value(token); + case SCRIPT -> script(token); + default -> this.currentState; + }; + + if(newLine) + this.currentLine++; + } + + @Override + public boolean finished() { + return false; + } + + public HTML getResult() { + return (HTML) this.currentElement; + } + + // Only public at the moment because of JavaScriptInterpreter + public static String stripTag(String token) { + return token.replace("<","").replace(">",""); + } + + /** + * Open a script + * @param token script + * @return next state + */ + private HTMLState script(String token) { + this.scriptInterpreter.currentLine = this.currentLine; + + this.scriptInterpreter.nextState(token); + + // TODO: Change for release. This is debug code + if(this.scriptInterpreter.finished()) { + // the ScriptInterpreter already has its own ElementBuilder + this.elementBuilders.pop(); + + this.elementBuilders.push(this.scriptInterpreter.getElementBuilder()); + + return this.close_tag(token); + } + else + return HTMLState.SCRIPT; + + } + + /** + * Open a tag + * @param token tag + * @return next state + */ + private HTMLState tag(String token) { + String tagName = stripTag(token); + + boolean hasText = this.tagManager.hasText(tagName); + + if(tagName.equalsIgnoreCase("!DOCTYPE")) + return HTMLState.DOCTYPE; + + else if(tagName.stripLeading().startsWith("!--")) + return returnCommentState(); + + this.elementBuilders.push(new ElementBuilder(this.parser, tagName)); + + + String[] split = new String[] {token}; + + if(token.contains(">")) + split = StringUtils_Remove_Please.splitSeq(new String[]{ + token.substring(0, token.indexOf('>')) + }, ">"); + + +// TODO: Change for release. This is debug code + if(this.elementBuilders.peek().getTagName().equals("script")) + return split.length == 1 ? HTMLState.SCRIPT : script(token.substring(token.indexOf(">")+1)); + + + if(!token.contains(">")) + return HTMLState.ATTRIBUTE; + + if(split.length == 1) + return hasText ? HTMLState.TEXT : HTMLState.TAG; + else + return attribute(token.substring(token.indexOf('>')+1)); + + } + + /** + * Close a tag + * @param token closing tag + * @return next state + */ + private HTMLState close_tag(String token) { + System.out.println(Arrays.toString(this.elementBuilders.toArray())); + this.currentClosingTag.append(token.toLowerCase().strip()); + + String ct = this.currentClosingTag.toString(); + + String tagName = this.elementBuilders.peek().getTagName(); + + // one instruction tags don't have a clo + if(!this.tagManager.hasText(tagName)) { + this.elementBuilders.pop(); + + return HTMLState.TEXT; + } + + // Comments are special + String should = tagName.equals("--") ? tagName + '>' : ""; + + System.out.println("should: " + should + " token: " + token); + + if(should.equals(ct)) { + + if(this.currentElement != null) + this.currentElement = this.currentElement.append(this.elementBuilders.pop().build()); + else + this.currentElement = this.elementBuilders.pop().build(); + + if(!(this.currentElement instanceof HTML)) + this.currentElement = this.currentElement.getParent(); + + this.currentClosingTag = new StringBuilder(); + return HTMLState.TEXT; + + } + + // not reached yet + else if(should.startsWith(ct)) + return HTMLState.TEXT; + + // token not the same as + else + throw new UnexpectedTokenException(token, this.currentLine, this.currentState); + + } + + private HTMLState text(String token) { + String strip = token.stripLeading(); + + // handle string begin + if(this.currentText.isEmpty()) { + if(strip.startsWith("<")) + return tag(strip); + + this.currentText.append(token); + + return HTMLState.TEXT; + } + + // handle string end or nested elements + else if(token.startsWith("<")) { + this.elementBuilders.peek().setText(this.currentText.toString()); + + // always reset + this.currentText = new StringBuilder(); + + if(token.startsWith("") || token.endsWith(">")) + return text(token); + + else if(token.contains("=")) { + // Recursition if declaration and equals are same token + this.currentAttribute = new StringBuilder(token.substring(0, token.indexOf('='))); + + return attribute_equals(token.substring(token.indexOf('='))); + } + else { + this.currentAttribute = new StringBuilder(token); + return HTMLState.ATTRIBUTE_EQUALS; + + } + + } + + /** + * Handle equals operator between attribute declaration and definition (can only be '='; will throw otherwise) + * @param token equals operator + * @return next state + */ + private HTMLState attribute_equals(String token) { + boolean dq = token.contains("\""), sq = token.contains("'"); + if(dq || sq) { + char quot = dq ? '"' : '\''; + // Recursion if declaration and equals are same token + + return value(token.substring(token.indexOf(quot)-1), quot); + } + else if(token.equals("=")){ + return HTMLState.VALUE; + } + else { + throw new UnexpectedTokenException(token, this.currentLine, this.currentState); + } + } + + /** + * Define an attribute + * @param token attribute value + * @return next state + */ + private HTMLState value(String token) { + return value(token, ' '); + } + + /** + * Define a string attribute + * @param token attribute value + * @param quot quotation sign + * @return next state + */ + private HTMLState value(String token, char quot) { + + // expected string, got other + if(!token.startsWith("'") && token.startsWith("\"")) + throw new ExpectStringException(token, this.currentLine, this.currentState); + + this.currentValue = new StringBuilder(); + + quot = quot != ' ' ? quot : token.charAt(0); + + // split by quote character + String[] split = token.split(String.valueOf(quot)); + + for(int i = 0; i < split.length; i++) + + // handle escaped quote character + if(split[i].endsWith("\\")) { + this.currentValue.append(split[i]).append(quot); + split[i] = ""; + } + + // delete first quotation character + if(!this.currentValue.isEmpty()) + this.currentValue.deleteCharAt(0); + + StringBuilder rebuilt = new StringBuilder(); + + // TODO possible error source + + for(String s : split) + + if(!s.isEmpty()) + rebuilt.append(s); + + if(!rebuilt.isEmpty() && this.tagManager.hasText(stripTag(token))) + return text(token); + + return HTMLState.ATTRIBUTE; + } + + /** + * Comment on code + * @param token comment + * @return next state + */ + private HTMLState comment(String token) { + if(this.currentText.isEmpty()) + this.currentText = new StringBuilder(); + + // append comment + if(!token.contains("-->")) { + this.currentText.append(token); + + return HTMLState.COMMENT; + } + + // end comment + + ElementBuilder elementBuilder = new ElementBuilder(this.parser, "--"); + elementBuilder.setText(this.currentText.toString()); + + // always reset + this.currentText = new StringBuilder(); + + this.elementBuilders.push(elementBuilder); + + if(token.split("-->").length == 1) + return commentResetInbetween(); + + + this.currentState = commentResetInbetween(); + + return close_tag( + token.substring(token.indexOf("-->")) + ); + + } + + /** + * Define the doctype + * @param token document type + * @return next state + */ + private HTMLState doctype(String token) { + String tag = stripTag(token); + if(!tag.equalsIgnoreCase("HTML")) { + /* + Not implemented. Might do so in the future, might not. + */ + } + + if(token.endsWith(">")) + return HTMLState.TEXT; + else + return HTMLState.DOCTYPE; + } + + // Helper methods + + /** + * Reset inbetween state + * @return previous inbetween state + */ + private HTMLState commentResetInbetween() { + HTMLState temp = this.inbetweenState; + + this.inbetweenState = HTMLState.COMMENT; + + return temp; + } + + /** + * Never forget to set the inbetween state! + * @return HTMLState.COMMENT + */ + private HTMLState returnCommentState() { + this.inbetweenState = this.currentState; + return HTMLState.COMMENT; + } + + /** + * Continue down without returning own State + * @param token next token + * @return this.currentState + */ + private HTMLState nextTokenDontReturn(String token) { + this.nextState(token); + + return this.currentState; + } + + /** + * Continue down without returning own State, and close the current tag + * @param token next token + * @return this.currentState + */ + private HTMLState closeTagDontReturn(String token) { + this.close_tag(token); + + return this.currentState; + } + + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/Interpreter.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/Interpreter.java new file mode 100644 index 0000000..0b940cb --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/Interpreter.java @@ -0,0 +1,7 @@ +package org.openautonomousconnection.htmlparser.interpreter; + +public interface Interpreter { + void nextState(String token); + boolean finished(); + +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/ExpectStringException.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/ExpectStringException.java new file mode 100644 index 0000000..afee1dd --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/ExpectStringException.java @@ -0,0 +1,9 @@ +package org.openautonomousconnection.htmlparser.interpreter.html.exception; + +import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState; + +public class ExpectStringException extends HTMLException { + public ExpectStringException(String value, int currentLine, HTMLState currentState) { + super("Expected string, got: " + value, currentLine, currentState); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/HTMLException.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/HTMLException.java new file mode 100644 index 0000000..449d2c6 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/HTMLException.java @@ -0,0 +1,9 @@ +package org.openautonomousconnection.htmlparser.interpreter.html.exception; + +import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState; + +public class HTMLException extends RuntimeException { + public HTMLException(String message, int currentLine, HTMLState currentState) { + super(message+ "\nat line: " + currentLine + "\nwith state: " + currentState.toString()); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/UnexpectedTokenException.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/UnexpectedTokenException.java new file mode 100644 index 0000000..f9ebf3d --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/exception/UnexpectedTokenException.java @@ -0,0 +1,9 @@ +package org.openautonomousconnection.htmlparser.interpreter.html.exception; + +import org.openautonomousconnection.htmlparser.interpreter.html.state.HTMLState; + +public class UnexpectedTokenException extends HTMLException { + public UnexpectedTokenException(String token, int currentLine, HTMLState currentState) { + super("Unexpected token: " + token, currentLine, currentState); + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLAttributeState.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLAttributeState.java new file mode 100644 index 0000000..d3827b4 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLAttributeState.java @@ -0,0 +1,6 @@ +package org.openautonomousconnection.htmlparser.interpreter.html.state; + +public enum HTMLAttributeState { + DECLARATION, + EQUALS +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLState.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLState.java new file mode 100644 index 0000000..583038f --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/html/state/HTMLState.java @@ -0,0 +1,16 @@ +// Author: maple +// date: 9/24/25 + +package org.openautonomousconnection.htmlparser.interpreter.html.state; + +public enum HTMLState { + TAG, + CLOSE_TAG, + ATTRIBUTE, + ATTRIBUTE_EQUALS, + VALUE, + TEXT, + SCRIPT, + COMMENT, + DOCTYPE +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/ScriptInterpreter.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/ScriptInterpreter.java new file mode 100644 index 0000000..9cc97d0 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/ScriptInterpreter.java @@ -0,0 +1,144 @@ +// Author: maple +// date: 9/28/25 + +package org.openautonomousconnection.htmlparser.interpreter.script; + +import lombok.Getter; +import org.openautonomousconnection.StringUtils_Remove_Please; +import org.openautonomousconnection.htmlparser.Parser; +import org.openautonomousconnection.htmlparser.TagManager; +import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder; +import org.openautonomousconnection.htmlparser.interpreter.Interpreter; + +public abstract class ScriptInterpreter implements Interpreter { + @Getter + protected Parser parser; + + @Getter + protected ElementBuilder elementBuilder; + + protected TagManager tagManager; + + StringBuilder currentText = null; + + public int currentLine; + + public ScriptInterpreter(Parser parser) { + this.parser = parser; + this.tagManager = parser.getTagManager(); + } + + // We stole this spaghetti-abomination from chatgpt. Don't change it, it works (or do if you know better) + public String parseScript(String html, int[] indexHolder) { + if(this.currentText == null) + this.currentText = new StringBuilder(); + + + int i = indexHolder[0]; + StringBuilder script = new StringBuilder(); + + boolean inString = false; + boolean inTriple = false; + char stringChar = 0; // ' or " + int tripleCount = 0; + + while (i < html.length()) { + char c = html.charAt(i); + + if(c == '\n') + this.currentLine++; + + if (!inString) { + if (c == '\'' || c == '"') { + + int ahead = countSameQuotes(html, i, c); + if (ahead >= 3) { + inString = true; + inTriple = true; + stringChar = c; + i += 3; + script.append(stringChar).append(stringChar).append(stringChar); + continue; + } else { + inString = true; + inTriple = false; + stringChar = c; + } + } + } else { + if (c == '\\') { + script.append(c); + i++; + if (i < html.length()) + script.append(html.charAt(i)); + i++; + continue; + } + + if (inTriple) { + int ahead = countSameQuotes(html, i, stringChar); + if (ahead >= 3) { + script.append(stringChar).append(stringChar).append(stringChar); + i += 3; + inString = false; + inTriple = false; + continue; + } + } else { + if (c == stringChar) { + inString = false; + inTriple = false; + } + } + } + + if (!inString) { + int index = html.indexOf('>'); + + if(index == -1) + continue; + + String closingTag = html.substring(i, index); + + // if(this.tagManager.isTagSpaced(closingTag)) { + if(StringUtils_Remove_Please.equalsIgnoreWhiteSpaces("", i)) { +// indexHolder[0] = i + "".length(); +// return script.toString(); +// } + } + + script.append(c); + i++; + } + + indexHolder[0] = i; + + this.currentText.append(script); + + return this.currentText.toString(); + } + + private int countSameQuotes(String s, int index, char quote) { + int count = 0; + int i = index; + while (i < s.length() && s.charAt(i) == quote) { + count++; + i++; + } + return count; + } + + @Override + public boolean finished() { + return this.currentText == null; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/javascript/JavaScriptInterpreter.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/javascript/JavaScriptInterpreter.java new file mode 100644 index 0000000..45e6c4d --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/javascript/JavaScriptInterpreter.java @@ -0,0 +1,115 @@ +// Author: maple +// date: 9/28/25 + +package org.openautonomousconnection.htmlparser.interpreter.script.javascript; + +import lombok.Getter; +import org.openautonomousconnection.StringUtils_Remove_Please; +import org.openautonomousconnection.htmlparser.Parser; +import org.openautonomousconnection.htmlparser.TagManager; +import org.openautonomousconnection.htmlparser.interpreter.HTMLInterpreter; +import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter; + +import java.util.ArrayList; +import java.util.List; + +public class JavaScriptInterpreter extends ScriptInterpreter { + // TODO: replace with actual interpreter + + @Getter + private Parser parser; + private TagManager tagManager; + + public JavaScriptInterpreter(Parser parser) { + super(parser); + this.parser = parser; + this.tagManager = parser.getTagManager(); + } + + private boolean scriptFinished = false; + + private StringBuilder text = new StringBuilder(); + + public String getText() { + return this.text.toString(); + } + + boolean inSQ, inDQ; + + @Override + public void nextState(String token) { + String[] sorted = StringUtils_Remove_Please.containsManySorted(token, "\"", "'"); + + this.text.append(token); + + if(sorted.length > 0) + if(!sorted[0].isEmpty()) { + + + +// for(String s : token.split()) + + +// int indexQuoteChar = token.indexOf(quoteChar); + + + // xor since this toggles the string case + inSQ = sorted[0].equals("'") ^ inSQ; + inDQ = sorted[0].equals("\"") ^ inDQ; + } + + + + + String[] split = token.split(sorted[0]); + +// if(!inSQ && !inDQ) + } + + @Override + public boolean finished() { + return this.scriptFinished; + } + +// private String[][] getTextWithStrings(String token) { +// char previous = 0; +// +// int lastStringIndex = 0; +// +// List strings = new ArrayList<>(), tokens = new ArrayList<>(); +// for(char c : token.toCharArray()) { +// if(this.isStringEncapsulator(c, previous)) { +// if(this.inQuotes()) { +// String string = token.substring(lastStringIndex, token.indexOf(c)-1); +// +// strings.add(string); +// +// token = string; +// } +// else +// +// +// } +// +// } +// } + + private boolean inQuotes() { + return this.inDQ || this.inSQ; + } + + private boolean isStringEncapsulator(char c, char previous) { + boolean escaped = previous == '\\'; + if(c == '\'' && !escaped && !this.inDQ) { + this.inSQ = !this.inSQ; + return true; + } + + else if (!escaped && !this.inSQ) { + this.inDQ = !this.inDQ; + return true; + } + + return false; + } +} diff --git a/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/pyscript/PyScriptInterpreter.java b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/pyscript/PyScriptInterpreter.java new file mode 100644 index 0000000..f244260 --- /dev/null +++ b/src/main/java/org/openautonomousconnection/htmlparser/interpreter/script/pyscript/PyScriptInterpreter.java @@ -0,0 +1,37 @@ +package org.openautonomousconnection.htmlparser.interpreter.script.pyscript; + +import org.openautonomousconnection.htmlparser.Parser; +import org.openautonomousconnection.htmlparser.interpreter.ElementBuilder; +import org.openautonomousconnection.htmlparser.interpreter.script.ScriptInterpreter; + +public class PyScriptInterpreter extends ScriptInterpreter { + + public PyScriptInterpreter(Parser parser) { + super(parser); + } + + @Override + public void nextState(String token) { + if(this.elementBuilder == null) + this.elementBuilder = new ElementBuilder(this.parser, "script"); + String r = this.parseScript(token, new int[] {0}); + +// System.out.println(r); + +// if(r == null) +// throw new UnexpectedTokenException("token", this.currentLine, HTMLState.SCRIPT); +// if(this.finished()) +// this.currentElement = new HTMLScript(r); + + if(this.finished()) { + this.elementBuilder.setText(r); + System.out.println(r); + } + + } +// +// @Override +// public boolean finished() { +// return this.currentElement != null; +// } +}